From 397e0ec2cc1bcde3d73b4e884de01e3fb54e0207 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 24 Nov 2017 17:36:01 -0800 Subject: [PATCH 0001/2939] Add DT_HALF support for SpaceToDepth on GPU This fix tries to address the issue raised in 14871 where there were no DT_HALF support for SpaceToDepth on GPU. This fix adds DT_HALF support on GPU and adds aditional test cases. This fix fixes 14871. Signed-off-by: Yong Tang --- tensorflow/core/kernels/spacetodepth_op.cc | 3 +++ tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index 23df1c35e5..d93a2a9bad 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); +REGISTER_KERNEL_BUILDER( + Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), + SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index a1a01e8813..e841472972 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -225,6 +225,10 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; +// Instantiate the GPU implementations for Eigen::Half. +template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; -- GitLab From 1d77785e9e13241cb318edce4661e0bdc2dd3095 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 24 Nov 2017 17:37:27 -0800 Subject: [PATCH 0002/2939] Add test cases for DT_HALF support for SpaceToDepth on GPU. Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/spacetodepth_op_test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index 3c98a685e0..4af0e6f9db 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -34,8 +34,8 @@ from tensorflow.python.platform import tf_logging class SpaceToDepthTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.space_to_depth(input_nhwc, block_size) @@ -58,6 +58,12 @@ class SpaceToDepthTest(test.TestCase): x_out = [[[[1, 2, 3, 4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1], [2]], [[3], [4]]]] + block_size = 2 + x_out = [[[[1, 2, 3, 4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testLargerInput2x2(self): -- GitLab From 3e6edce1f41a79ca83358b14af9230826e871b66 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 24 Nov 2017 17:50:04 -0800 Subject: [PATCH 0003/2939] Address `Eigen::Half` -> `Eigen::half` Signed-off-by: Yong Tang --- tensorflow/core/kernels/spacetodepth_op.cc | 4 ++-- tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index d93a2a9bad..e59adfc6ac 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -188,8 +188,8 @@ REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); REGISTER_KERNEL_BUILDER( - Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), - SpaceToDepthOp); + Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), + SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index e841472972..8466fa192f 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -225,9 +225,9 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; -// Instantiate the GPU implementations for Eigen::Half. -template struct functor::SpaceToDepthOpFunctor; -template struct functor::SpaceToDepthOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; -- GitLab From 17b982cad07799feeb00614b0faeba4cf95474c2 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 25 Nov 2017 17:33:43 -0800 Subject: [PATCH 0004/2939] Add DT_HALF support for DepthToSpace on GPU Signed-off-by: Yong Tang --- tensorflow/core/kernels/depthtospace_op.cc | 3 +++ tensorflow/core/kernels/depthtospace_op_gpu.cu.cc | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/core/kernels/depthtospace_op.cc b/tensorflow/core/kernels/depthtospace_op.cc index 39aa3e9eb0..b74a09e2cb 100644 --- a/tensorflow/core/kernels/depthtospace_op.cc +++ b/tensorflow/core/kernels/depthtospace_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); +REGISTER_KERNEL_BUILDER( + Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), + DepthToSpaceOp); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 7a66285383..2d39abce16 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -229,6 +229,10 @@ struct DepthToSpaceOpFunctor { template struct functor::DepthToSpaceOpFunctor; template struct functor::DepthToSpaceOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::DepthToSpaceOpFunctor; +template struct functor::DepthToSpaceOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::DepthToSpaceOpFunctor; -- GitLab From 1100256692a2b130f3ef2b4e36cd5b63241672ce Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 25 Nov 2017 17:34:14 -0800 Subject: [PATCH 0005/2939] Add test cases for DT_HALF support with DepthToSpace on GPU. Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/depthtospace_op_test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py index 7df2366954..f03ad85f17 100644 --- a/tensorflow/python/kernel_tests/depthtospace_op_test.py +++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py @@ -35,8 +35,8 @@ from tensorflow.python.platform import tf_logging class DepthToSpaceTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.depth_to_space(input_nhwc, block_size) @@ -59,6 +59,12 @@ class DepthToSpaceTest(test.TestCase): x_out = [[[[1], [2]], [[3], [4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1, 2, 3, 4]]]] + block_size = 2 + x_out = [[[[1], [2]], [[3], [4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testBlockSize2(self): -- GitLab From 7a590cd8ea21ae085845efc6d9b1724d42800659 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 19 Jan 2018 19:13:43 -0800 Subject: [PATCH 0006/2939] Turn the op_performance_data proto lib into a header only library by default PiperOrigin-RevId: 182621348 Signed-off-by: Jie --- tensorflow/core/BUILD | 6 +++-- tensorflow/core/grappler/costs/BUILD | 24 +++++++++---------- .../core/platform/default/build_config.bzl | 8 +++++++ tensorflow/python/BUILD | 4 ++-- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 579174efa3..f2f66fc567 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -136,6 +136,8 @@ load( "tf_nano_proto_library", "tf_protos_all", "tf_protos_all_impl", + "tf_protos_grappler", + "tf_protos_grappler_impl", ) load( "//tensorflow/core:platform/default/build_config_root.bzl", @@ -1529,7 +1531,7 @@ cc_library( "@snappy", "@zlib_archive//:zlib", "@protobuf_archive//:protobuf", - ] + tf_protos_all_impl(), + ] + tf_protos_all_impl() + tf_protos_grappler_impl(), ) # File compiled with extra flags to get cpu-specific acceleration. @@ -2094,7 +2096,7 @@ tf_cuda_library( ":core_cpu_base", ":proto_text", "//tensorflow/core/grappler:grappler_item", - ] + if_static([":core_cpu_impl"]) + tf_protos_all(), + ] + if_static([":core_cpu_impl"]) + tf_protos_all() + tf_protos_grappler(), ) tf_cuda_library( diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 7abc155c19..0fe01e9c9e 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -1,6 +1,10 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cuda_library", "tf_cc_test") +load( + "//tensorflow/core:platform/default/build_config.bzl", + "tf_protos_grappler", +) filegroup( name = "all_files", @@ -37,6 +41,7 @@ tf_proto_library( name = "op_performance_data", srcs = ["op_performance_data.proto"], cc_api_version = 2, + default_header = True, protodeps = tf_additional_all_protos(), visibility = ["//visibility:public"], ) @@ -47,7 +52,6 @@ cc_library( hdrs = ["graph_properties.h"], visibility = ["//visibility:public"], deps = [ - ":op_performance_data_cc", ":utils", "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", @@ -55,7 +59,7 @@ cc_library( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:cluster", - ], + ] + tf_protos_grappler(), ) tf_cc_test( @@ -135,7 +139,7 @@ tf_cuda_library( hdrs = ["utils.h"], visibility = ["//visibility:public"], deps = [ - ":op_performance_data_cc", + "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:graph", "//tensorflow/core:lib", @@ -143,8 +147,7 @@ tf_cuda_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:utils", - "//third_party/eigen3", - ], + ] + tf_protos_grappler(), ) tf_cc_test( @@ -207,9 +210,8 @@ cc_library( hdrs = ["op_context.h"], visibility = ["//visibility:public"], deps = [ - ":op_performance_data_cc", "//tensorflow/core:protos_all_cc", - ], + ] + tf_protos_grappler(), ) cc_library( @@ -276,12 +278,11 @@ cc_library( deps = [ ":cost_estimator", ":op_context", - ":op_performance_data_cc", + "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler/clusters:utils", - "//third_party/eigen3", - ], + ] + tf_protos_grappler(), ) tf_cc_test( @@ -305,7 +306,6 @@ cc_library( ":cost_estimator", ":graph_properties", ":op_level_cost_estimator", - ":op_performance_data_cc", ":utils", ":virtual_placer", ":virtual_scheduler", @@ -314,7 +314,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", - ], + ] + tf_protos_grappler(), ) tf_cc_test( diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index e9c510c93c..2102c5cca3 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -378,6 +378,14 @@ def tf_protos_all(): extra_deps=tf_protos_all_impl(), otherwise=["//tensorflow/core:protos_all_cc"]) +def tf_protos_grappler_impl(): + return ["//tensorflow/core/grappler/costs:op_performance_data_cc_impl"] + +def tf_protos_grappler(): + return if_static( + extra_deps=tf_protos_grappler_impl(), + otherwise=["//tensorflow/core/grappler/costs:op_performance_data_cc"]) + def tf_env_time_hdrs(): return [ "platform/env_time.h", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3493ed76f3..dbb29d9878 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -32,6 +32,7 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library_py") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_lib_deps") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos") +load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_grappler") load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_plugin_deps") load("//tensorflow/python:build_defs.bzl", "tf_gen_op_wrapper_private_py") load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_verbs_deps") @@ -209,9 +210,8 @@ cc_library( "//tensorflow/core/grappler/costs:analytical_cost_estimator", "//tensorflow/core/grappler/costs:cost_estimator", "//tensorflow/core/grappler/costs:measuring_cost_estimator", - "//tensorflow/core/grappler/costs:op_performance_data_cc", "//tensorflow/core/grappler/costs:utils", - ], + ] + tf_protos_grappler(), ) cc_library( -- GitLab From 550a8fa4e9a29bde527730eb45bcbfb7e9067436 Mon Sep 17 00:00:00 2001 From: Jie Date: Mon, 22 Jan 2018 18:07:49 -0800 Subject: [PATCH 0007/2939] [Update] Refactor optimization pass through grappler tensorflow fixed dependency issues in core/grappler/constant_folding removed python calls for optimization(layout/constfold), moved optimization to convert_graph.cc bug: dependency issue with //tensorflow/core/grappler/clusters:single_machine TODO: shape inference through grappler. cluster for optimization pass. --- tensorflow/contrib/tensorrt/BUILD | 6 +- .../contrib/tensorrt/convert/convert_graph.cc | 56 +++++++++++++++++-- .../contrib/tensorrt/python/trt_convert.py | 36 ++++++------ 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 723c9f5434..1cb916e4c3 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -192,7 +192,11 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core:framework_headers_lib", "//tensorflow/core:core_cpu_base", - #"//third_party/eigen3", + "//tensorflow/core/grappler/optimizers:constant_folding", + "//tensorflow/core/grappler/optimizers:layout_optimizer", + "//tensorflow/core/grappler/clusters:virtual_cluster", + "//tensorflow/core/grappler:devices", + #"//tensorflow/core/grappler/clusters:single_machine", ], ) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 29aa555467..c1948c8144 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -40,6 +40,15 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) +#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" +#include "tensorflow/core/grappler/devices.h" +//#include "tensorflow/core/grappler/clusters/single_machine.h" +#include "tensorflow/core/grappler/clusters/virtual_cluster.h" +#include "tensorflow/core/protobuf/device_properties.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils.h" + //------------------------------------------------------------------------------ namespace tensorrt { namespace convert { @@ -199,9 +208,48 @@ tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size, tensorflow::GraphDef* new_graph_def) { + + // optimization pass + tensorflow::grappler::GrapplerItem item; + item.fetch = output_names; + tensorflow::GraphDef gdef; + + // layout optimization + item.graph = graph_def; + tensorflow::grappler::LayoutOptimizer optimizer; + tensorflow::grappler::Cluster* gCluster; + + // virtual cluster + tensorflow::DeviceProperties device_properties; + device_properties.set_type("GPU"); + device_properties.mutable_environment()->insert({"architecture", "6"}); + gCluster = + new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); + + // single machine + int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); + int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); + LOG(DEBUG) << "cpu_cores: " << num_cpu_cores; + LOG(DEBUG) << "gpus: " << num_gpus; + // int timeout_s = 60 * 10; + // gCluster = new tensorflow::grappler::SingleMachine( + // timeout_s, num_cpu_cores, num_gpus); + + tensorflow::Status status = optimizer.Optimize(gCluster, item, &gdef); + + if (status !=tensorflow::Status::OK()) + return status; + + // constant folding + item.graph = gdef; + tensorflow::grappler::ConstantFolding fold(nullptr); + status = fold.Optimize(nullptr, item, &gdef); + if (status !=tensorflow::Status::OK()) + return status; + ShapeMap shape_map; TF_RETURN_IF_ERROR( - tensorflow::trt::inferShapes(graph_def, output_names, shape_map)); + tensorflow::trt::inferShapes(gdef, output_names, shape_map)); std::stringstream oss; for (auto& n : shape_map) { // nodes oss << " Node= " << n.first << ", "; @@ -213,10 +261,10 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), - graph_def.library()); + gdef.library()); tensorflow::Graph graph(flib); TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &graph)); + tensorflow::GraphConstructorOptions(), gdef, &graph)); // Segment the graph into subgraphs that can be converted to TensorRT tensorrt::segment::SegmentOptions segment_options; @@ -227,7 +275,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( segment_options.minimum_segment_size = 2; tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( - graph_def, IsTensorRTCandidate, segment_options, &segments)); + gdef, IsTensorRTCandidate, segment_options, &segments)); if (segments.size() > 1) { // LOG(WARNING) << "Multiple TensorRT candidate subgraphs were found, " //<< "but only the first can be converted."; diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index a66afa8d05..354f0c8b42 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -48,25 +48,27 @@ def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace # output_graph_def_string = trt_convert( # input_graph_def_string,outputs, # max_batch_size,max_workspace_size, status) - g = tf.Graph() - with g.as_default(): - tf.import_graph_def(input_graph_def, name="") - rewriter_config = rewriter_config_pb2.RewriterConfig() - rewriter_config.optimizers.append('layout') - rewriter_config.optimizers.append('constfold') + # g = tf.Graph() + # with g.as_default(): + # tf.import_graph_def(input_graph_def, name="") + # rewriter_config = rewriter_config_pb2.RewriterConfig() + # rewriter_config.optimizers.append('layout') + # rewriter_config.optimizers.append('constfold') - # mark output nodes as fetch - train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) - for node_name in outputs: - out_node = g.get_operation_by_name(node_name) - for i in range(0,len(out_node.outputs)): - train_op.append(out_node.outputs[0]) + # # mark output nodes as fetch + # train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + # for node_name in outputs: + # out_node = g.get_operation_by_name(node_name) + # for i in range(0,len(out_node.outputs)): + # train_op.append(out_node.outputs[0]) - # constant folding - mg = meta_graph.create_meta_graph_def(graph=g) - meta_graph.add_collection_def(mg, ops.GraphKeys.TRAIN_OP) - optimized_graph_def_str = \ - tf_optimizer.OptimizeGraph(rewriter_config, mg).SerializeToString() + # # constant folding + # mg = meta_graph.create_meta_graph_def(graph=g) + # meta_graph.add_collection_def(mg, ops.GraphKeys.TRAIN_OP) + # optimized_graph_def_str = \ + # tf_optimizer.OptimizeGraph(rewriter_config, mg).SerializeToString() + + optimized_graph_def_str = input_graph_def.SerializeToString() # TODO(sami): Fix this when we can return status from C++ library # There is a problem with the TF internal library setup that doesn't allow us to return a status object from C++. -- GitLab From da188d378bc6826a8f182b42aa8175a932a0c2f8 Mon Sep 17 00:00:00 2001 From: Jie Date: Tue, 23 Jan 2018 17:23:00 -0800 Subject: [PATCH 0008/2939] [UPDATE] Refactoring shape inference Removed shape refiner and apply shape inference through grappler/costs/graph_properties Currently using static shape inference --- tensorflow/contrib/tensorrt/BUILD | 3 +- .../contrib/tensorrt/convert/convert_graph.cc | 39 +++--- .../contrib/tensorrt/convert/convert_nodes.cc | 24 ++-- .../contrib/tensorrt/convert/convert_nodes.h | 5 +- .../contrib/tensorrt/convert/inferShapes.cc | 125 ------------------ .../contrib/tensorrt/convert/inferShapes.h | 39 ------ 6 files changed, 40 insertions(+), 195 deletions(-) delete mode 100644 tensorflow/contrib/tensorrt/convert/inferShapes.cc delete mode 100644 tensorflow/contrib/tensorrt/convert/inferShapes.h diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 1cb916e4c3..f92b60b03a 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -174,12 +174,10 @@ cc_library( "convert/convert_nodes.cc", "convert/convert_graph.cc", "segment/segment.cc", - "convert/inferShapes.cc", ], hdrs=[ "convert/convert_nodes.h", "convert/convert_graph.h", - "convert/inferShapes.h", "segment/segment.h", "segment/union_find.h", ], @@ -196,6 +194,7 @@ cc_library( "//tensorflow/core/grappler/optimizers:layout_optimizer", "//tensorflow/core/grappler/clusters:virtual_cluster", "//tensorflow/core/grappler:devices", + "//tensorflow/core/grappler/costs:graph_properties", #"//tensorflow/core/grappler/clusters:single_machine", ], ) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index c1948c8144..e90790716c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -28,7 +28,6 @@ limitations under the License. #include "NvInfer.h" #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" -#include "tensorflow/contrib/tensorrt/convert/inferShapes.h" #include "tensorflow/contrib/tensorrt/segment/segment.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def.pb.h" @@ -49,6 +48,8 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" + //------------------------------------------------------------------------------ namespace tensorrt { namespace convert { @@ -123,7 +124,8 @@ std::unordered_map> BuildTensorNameMap( tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::Graph& graph, const std::vector& output_names, const std::set& subgraph_node_ids, size_t max_batch_size, - size_t max_workspace_size, const ShapeMap& shape_map) { + size_t max_workspace_size, + const tensorflow::grappler::GraphProperties& graph_properties) { tensorflow::EdgeSet subgraph_incoming_edges; GetSubGraphIncomingEdges(graph, subgraph_node_ids, &subgraph_incoming_edges); @@ -161,7 +163,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::NodeDef trt_node_def; TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size, shape_map, &trt_node_def)); + max_batch_size, max_workspace_size, graph_properties, &trt_node_def)); tensorflow::Status status; tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); @@ -246,19 +248,24 @@ tensorflow::Status ConvertGraphDefToTensorRT( status = fold.Optimize(nullptr, item, &gdef); if (status !=tensorflow::Status::OK()) return status; + + // AJ refactoring shape inference through grappler/GraphProperties. + tensorflow::grappler::GraphProperties static_graph_properties(item); + static_graph_properties.InferStatically(false); + // TF_CHECK_OK(static_graph_prop.InferStatically(false)); + // ShapeMap shape_map; + // TF_RETURN_IF_ERROR( + // tensorflow::trt::inferShapes(gdef, output_names, shape_map)); + // std::stringstream oss; + // for (auto& n : shape_map) { // nodes + // oss << " Node= " << n.first << ", "; + // for (auto o : n.second) { // outputs + // oss << o.first.DebugString() << " T= " << o.second << ", "; + // } + // LOG(DEBUG) << oss.str(); + // oss.str(""); + // } - ShapeMap shape_map; - TF_RETURN_IF_ERROR( - tensorflow::trt::inferShapes(gdef, output_names, shape_map)); - std::stringstream oss; - for (auto& n : shape_map) { // nodes - oss << " Node= " << n.first << ", "; - for (auto o : n.second) { // outputs - oss << o.first.DebugString() << " T= " << o.second << ", "; - } - LOG(DEBUG) << oss.str(); - oss.str(""); - } // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -291,7 +298,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, shape_map)); + max_workspace_size, static_graph_properties)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 83f78d7eff..6c77cdc0b6 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1548,7 +1548,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( const tensorflow::Graph& graph, const std::set& subgraph_node_ids, const std::vector>& input_inds, const std::vector>& output_inds, size_t max_batch_size, - size_t max_workspace_size, const ShapeMap& shape_map, + size_t max_workspace_size, + const tensorflow::grappler::GraphProperties& graph_properties, tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. @@ -1605,20 +1606,20 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_names.push_back(node_name); // insert original node name without port // TODO(jie): alternative :) // tensorflow::DataType tf_dtype = node->output_type(output_idx); - if (shape_map.count(node_name) == 0) + if (!graph_properties.HasOutputProperties(node_name)) return tensorflow::errors::Internal("failed to find input node: " + node_name); - auto input_entry_vec = shape_map.at(node_name); - if (static_cast(input_entry_vec.size()) < output_idx) + auto op_info_vec = graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( "accessing output index of: " + std::to_string(output_idx) + ", at node: " + node_name + "with output entry from shape_map: " + - std::to_string(input_entry_vec.size())); + std::to_string(op_info_vec.size())); - auto input_entry = input_entry_vec.at(output_idx); + auto op_info = op_info_vec.at(output_idx); - tensorflow::DataType tf_dtype = input_entry.second; + tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); @@ -1627,15 +1628,16 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "accessing output index of: " << std::to_string(output_idx) << ", at node: " << node_name << "with output entry from shape_map: " - << std::to_string(input_entry_vec.size()); + << std::to_string(op_info_vec.size()); + // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - for (int i = 1; i < input_entry.first.dims(); i++) { + for (int i = 1; i < op_info.shape().dim_size(); i++) { LOG(DEBUG) << "dimension: " << i - << " , size: " << input_entry.first.dim_size(i); - input_dim_psuedo_chw.d[i - 1] = input_entry.first.dim_size(i); + << " , size: " << op_info.shape().dim(i).size(); + input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); } // TODO(ben,jie): proper way to restore input tensor name? diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index a624582dec..dc59c37892 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -20,10 +20,10 @@ limitations under the License. #include #include -#include "tensorflow/contrib/tensorrt/convert/inferShapes.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" namespace tensorrt { namespace convert { @@ -34,7 +34,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_inds, // {node_id, output_idx} const std::vector>& output_inds, // {node_id, output_idx} - size_t max_batch_size, size_t max_workspace_size, const ShapeMap& shape_map, + size_t max_batch_size, size_t max_workspace_size, + const tensorflow::grappler::GraphProperties& graph_prop, tensorflow::NodeDef* trt_node); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/inferShapes.cc b/tensorflow/contrib/tensorrt/convert/inferShapes.cc deleted file mode 100644 index c7f0f0023d..0000000000 --- a/tensorflow/contrib/tensorrt/convert/inferShapes.cc +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/contrib/tensorrt/convert/inferShapes.h" -#include -#include "tensorflow/core/common_runtime/shape_refiner.h" -#include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/types.pb_text.h" -#include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/graph/graph_constructor.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/logging.h" - -#define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) - -namespace tensorflow { -namespace trt { -std::vector getTypes(const tensorflow::OpDef& op, - const tensorflow::NodeDef& nd, - bool inp = true) { - const auto& attrMap = nd.attr(); - auto getType = [&attrMap](decltype( - op.input_arg(0)) a) -> std::vector { - std::vector tvec; - if (!a.type_list_attr().empty()) { // get the list types - const auto& tl = attrMap.at(a.type_list_attr()).list(); - int tsize = tl.type_size(); - tvec.reserve(tsize); - for (int t = 0; t < tsize; t++) { - tvec.push_back(tl.type(t)); - } - return tvec; - } - tensorflow::DataType cType = tensorflow::DT_INVALID; - if (a.type() != tensorflow::DT_INVALID) { // get defined types - cType = a.type(); - } else if (!a.type_attr().empty()) { - cType = attrMap.at(a.type_attr()).type(); - } - if (!a.number_attr().empty()) { // numbertypes - int64 nTensors = attrMap.at(a.number_attr()).i(); - tvec = std::vector(nTensors, cType); - return tvec; - } - tvec.push_back(cType); - return tvec; - }; - std::vector types; - if (inp) { - int n_inputs = op.input_arg_size(); - for (int i = 0; i < n_inputs; i++) { - auto tout = getType(op.input_arg(i)); - LOG(DEBUG) << "Node= " << nd.name() << " #inputs" << tout.size(); - types.insert(types.end(), tout.begin(), tout.end()); - } - } else { - int n_outputs = op.output_arg_size(); - // types.resize(n_outputs); - for (int i = 0; i < n_outputs; i++) { - auto tout = getType(op.output_arg(i)); - LOG(DEBUG) << "Node= " << nd.name() << " #outputs" << tout.size(); - types.insert(types.end(), tout.begin(), tout.end()); - } - } - return types; -} - -tensorflow::Status inferShapes(const tensorflow::GraphDef& graph_def, - const std::vector& output_names, - ShapeMap& shapes) { - tensorflow::Graph g(OpRegistry::Global()); - TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &g)); - std::vector POnodes; - tensorflow::GetPostOrder(g, &POnodes); - tensorflow::ShapeRefiner refiner(graph_def.versions().producer(), - OpRegistry::Global()); - for (auto n = POnodes.rbegin(); n != POnodes.rend(); ++n) { - TF_CHECK_OK(refiner.AddNode(*n)); - } - - auto shape2PTS = [](tensorflow::shape_inference::InferenceContext* ic, - const tensorflow::shape_inference::ShapeHandle& sh) - -> tensorflow::PartialTensorShape { - std::vector dims; - int64 rank = ic->Rank(sh); - for (int64 i = 0; i < rank; i++) { - auto dh = ic->Dim(sh, i); - dims.push_back(ic->Value(dh)); - } - return tensorflow::PartialTensorShape(dims); - }; - for (const auto& n : POnodes) { - auto ic = refiner.GetContext(n); - if (ic) { - int nOuts = ic->num_outputs(); - auto types = getTypes(n->op_def(), n->def(), false); - std::vector< - std::pair> - SAT; - for (int i = 0; i < nOuts; i++) { - auto PTS = shape2PTS(ic, ic->output(i)); - SAT.push_back({PTS, types.at(i)}); - } - shapes[n->name()] = SAT; - } else { - LOG(WARNING) << "Node " << n->name() << " doesn't have InferenceContext!"; - } - } - return tensorflow::Status::OK(); -} -} // namespace trt -} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/inferShapes.h b/tensorflow/contrib/tensorrt/convert/inferShapes.h deleted file mode 100644 index b94f1ee893..0000000000 --- a/tensorflow/contrib/tensorrt/convert/inferShapes.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_ -#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_ - -#include -#include -#include -#include - -#include "tensorflow/core/framework/graph.pb.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/core/status.h" - -typedef std::unordered_map>> - ShapeMap; -namespace tensorflow { -namespace trt { -tensorflow::Status inferShapes(const tensorflow::GraphDef& graph_def, - const std::vector& output_names, - ShapeMap& shapes); -} -} // namespace tensorflow - -#endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_ -- GitLab From ccb555f1e7947785763cf65a6713634a85c72607 Mon Sep 17 00:00:00 2001 From: Jie Date: Wed, 24 Jan 2018 16:32:02 -0800 Subject: [PATCH 0009/2939] [BUG_FIX] 'Mean' converter ConvertReduce fixed 1. permutation index 2. output tensor pushed back into map --- tensorflow/contrib/tensorrt/convert/convert_graph.cc | 2 +- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index e90790716c..16d6e6ec7d 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -60,7 +60,7 @@ static std::unordered_set output_nodes; bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", - "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" + "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 6c77cdc0b6..6a93edfb47 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1334,7 +1334,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, int nbDims = dims.nbDims + 1; TRT_ShapedWeights index_list = inputs.at(1).weights(); - + TFAttrs attrs(node_def); // TODO(jie): handle data type // auto data_type = attrs.get("T"); @@ -1372,7 +1372,9 @@ tensorflow::Status ConvertReduce(Converter& ctx, if (index_list_data[i] == 0) return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" + node_def.name()); - if (index_list_data[i] == 1) permuted_index = 1; + if (index_list_data[i] == 1) + permuted_index = 1; + idx_set.emplace(index_list_data[i]); } @@ -1380,7 +1382,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nbDims; i++) { - if (idx_set.count(i)) { + if (idx_set.count(i)==0) { permuted_index = i; break; } @@ -1415,6 +1417,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, output_tensor = ctx.transposeTensor( const_cast(output_tensor), permutation_order); } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } -- GitLab From e1eb01e5edf1b5814d7f50e8bcdf910c02a49256 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 24 Jan 2018 19:29:22 -0800 Subject: [PATCH 0010/2939] Adding Resources for calibration and execution --- tensorflow/contrib/tensorrt/BUILD | 21 ++++++ .../contrib/tensorrt/convert/convert_nodes.cc | 1 + .../tensorrt/resources/TRTInt8Calibrator.cc | 65 +++++++++++++++++++ .../tensorrt/resources/TRTInt8Calibrator.h | 40 ++++++++++++ .../tensorrt/resources/TRTResourceManager.cc | 18 +++++ .../tensorrt/resources/TRTResourceManager.h | 37 +++++++++++ .../contrib/tensorrt/resources/TRTResources.h | 32 +++++++++ 7 files changed, 214 insertions(+) create mode 100644 tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc create mode 100644 tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h create mode 100644 tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc create mode 100644 tensorflow/contrib/tensorrt/resources/TRTResourceManager.h create mode 100644 tensorflow/contrib/tensorrt/resources/TRTResources.h diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 1cb916e4c3..37aa573cdb 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -168,6 +168,26 @@ tf_py_wrap_cc( ], ) +cc_library( + name = "trt_resources", + srcs = [ + "resources/TRTInt8Calibrator.cc", + "resources/TRTResourceManager.cc", + ], + hdrs = [ + "resources/TRTInt8Calibrator.h", + "resources/TRTResourceManager.h", + "resources/TRTResources.h", + ], + deps = [ + "@local_config_tensorrt//:tensorrt", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:core_cpu_base", + + ], +) + cc_library( name= "trt_conversion", srcs=[ @@ -188,6 +208,7 @@ cc_library( "@protobuf_archive//:protobuf_headers", "@nsync//:nsync_headers", ":trt_logging", + ":trt_resources", "//tensorflow/core:framework_lite", "//tensorflow/core:protos_all_cc", "//tensorflow/core:framework_headers_lib", diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 83f78d7eff..3684ac8e78 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -39,6 +39,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) // Check if the types are equal. Cast to int first so that failure log message diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc new file mode 100644 index 0000000000..3c94b52ea6 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -0,0 +1,65 @@ +// +// Created by skama on 1/24/18. +// + +#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" + +#include +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace trt { + +int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } + +bool TRTInt8Calibrator::setBatch( + const std::unordered_map& data) { + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + } + for (const auto it : data) { + auto devptr = dev_buffers_.find(it.first); + if (devptr == dev_buffers_.end()) { + LOG(FATAL) << "FATAL input name '" << it.first + << "' does not match with the buffer names"; + } + const auto& d = devptr->second; + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyHostToDevice); + if (status != 0) { + LOG(FATAL) << "cudaMemcpy for '" << it.first << "' failed with " + << status; + } + } + calib_running_.store(true, std::memory_order_release); // release builder + cond_.notify_all(); + return true; +} + +bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, + int nbBindings) { + calib_running_.store(false, std::memory_order_release); // wait for new batch + cond_.notify_all(); + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + } + if (done_) { + return false; + } + for (int i = 0; i < nbBindings; i++) { + auto it = dev_buffers_.find(names[i]); + if (it == dev_buffers_.end()) { + LOG(FATAL) << "Calibration engine asked for unknown tensor name '" + << names[i] << "' at position " << i; + } + bindings[i] = it->second.first; + } + return true; +} + +} // namespace trt +} // namespace tensorflow \ No newline at end of file diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h new file mode 100644 index 0000000000..b0e904b666 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h @@ -0,0 +1,40 @@ +// +// Created by skama on 1/24/18. +// + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ + +#include +#include +#include +#include +#include +#include "tensorflow/core/platform/mutex.h" +namespace tensorflow { +namespace trt { + +struct TRTInt8Calibrator : public nvinfer1::IInt8Calibrator { + public: + TRTInt8Calibrator(const std::unordered_map< + std::string, std::pair>& dev_buffers, + int batch_size) + : batch_size_(batch_size), + done_(false), + dev_buffers_(dev_buffers), + calib_running_(false){}; + int getBatchSize() const; + bool getBatch(void* bindings[], const char* names[], int nbBindings) override; + bool setBatch(const std::unordered_map &data); + void setDone(){done_=true;} + private: + int batch_size_; + tensorflow::mutex cond_mtx_; + tensorflow::condition_variable cond_; + bool done_; + std::unordered_map> dev_buffers_; + std::atomic_bool calib_running_; +}; +} // namespace trt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc new file mode 100644 index 0000000000..b060295301 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc @@ -0,0 +1,18 @@ +// +// Created by skama on 1/23/18. +// + +#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" + + +std::shared_ptr tensorflow::trt::TRTResourceManager::getManager(const std::string &mgr_name) { + // mutex is held for lookup only. Most instantiations where mutex will be held longer + // will be during op creation and should be ok. + tensorflow::mutex_lock lock(map_mutex_); + auto s=managers_.find(mgr_name); + if(s==managers_.end()){ + auto it=managers_.emplace(mgr_name,std::make_shared(mgr_name)); + return it.first->second; + } + return s->second; +} diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h new file mode 100644 index 0000000000..5ec66ab582 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h @@ -0,0 +1,37 @@ +// +// Created by skama on 1/23/18. +// + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCEMANAGER_H_ + +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCE_TRTRESOURCEMANAGER_H_ +#include + +#include +#include +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +namespace trt { +class TRTResourceManager { + TRTResourceManager() = default; + + public: + static std::shared_ptr instance() { + static std::shared_ptr instance_( + new TRTResourceManager); + return instance_; + } + // returns a manager for given op, if it doesn't exists it creates one + std::shared_ptr getManager( + const std::string& op_name); + + private: + std::unordered_map> + managers_; + tensorflow::mutex map_mutex_; +}; +} // namespace trt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCEMANAGER_H_ diff --git a/tensorflow/contrib/tensorrt/resources/TRTResources.h b/tensorflow/contrib/tensorrt/resources/TRTResources.h new file mode 100644 index 0000000000..2b65017943 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTResources.h @@ -0,0 +1,32 @@ +// +// Created by skama on 1/23/18. +// + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ + +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ + +#include +#include +#include "tensorflow/contrib/tensorrt/resourcemgr/TRTInt8Calibrator.h" +#include "tensorflow/core/framework/resource_mgr.h" + +namespace tensorflow { +namespace trt { + +struct TRTCalibrationResource : public tensorflow::ResourceBase { + TRTCalibrationResource():calibrator(nullptr), builder(nullptr), thr(nullptr){}; + TRTInt8Calibrator* calibrator; + nvinfer1::IBuilder* builder; + std::thread *thr; +}; + +struct TRTEngineResource:public tensorflow::ResourceBase{ + TRTEngineResource():runtime(nullptr), ctx(nullptr){}; + nvinfer1::IRuntime *runtime; + nvinfer1::IExecutionContext *ctx; +}; + +} +} +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ -- GitLab From 6ea7a24c615e7cd9445395539a37e67cb74eede2 Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 25 Jan 2018 15:14:50 -0800 Subject: [PATCH 0011/2939] [UPDATE] Converter update ConcatV2 AvgPool inception_v1 passed --- .../contrib/tensorrt/convert/convert_graph.cc | 3 +- .../contrib/tensorrt/convert/convert_nodes.cc | 122 +++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 16d6e6ec7d..2b6a26491b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -60,7 +60,8 @@ static std::unordered_set output_nodes; bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", - "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean" + "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", + "AvgPool", "ConcatV2" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 86c43d960a..ff2e37b7da 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1093,6 +1093,8 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): support other pooling type if (node_def.op() == "MaxPool") type = nvinfer1::PoolingType::kMAX; + else if (node_def.op() == "AvgPool") + type = nvinfer1::PoolingType::kAVERAGE; else return tensorflow::errors::Unimplemented("only supports Max pool"); @@ -1253,6 +1255,25 @@ tensorflow::Status ConvertConst(Converter& ctx, // weights = ctx.get_temp_weights(dtype, scalar_shape); // std::memcpy(const_cast(weights.values), // weights_tensor.float_val().data(), weights.size_bytes()); + } else if (!weights_tensor.int_val().empty()) { + LOG(DEBUG) << "int!!!" << node_def.name(); + nvinfer1::Dims scalar_shape; + if (tensor.dims() > 0) { + LOG(DEBUG) << "dimensions: " << tensor.dims(); + weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), + get_tensor_shape(tensor)); + } else { + LOG(DEBUG) << "dimensions: " << tensor.dims(); + scalar_shape.nbDims = 1; + scalar_shape.d[0] = 1; + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { + scalar_shape.d[i] = 0; + scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; + } + weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), + scalar_shape); + } } else if (!weights_tensor.tensor_content().empty()) { LOG(DEBUG) << "TENSOR!!!" << node_def.name(); weights = TRT_ShapedWeights(dtype, weights_tensor.tensor_content().data(), @@ -1261,6 +1282,7 @@ tensorflow::Status ConvertConst(Converter& ctx, return tensorflow::errors::Unimplemented( "not supported constant type, at " + node_def.name()); } + // pass the output outputs->push_back(TRT_TensorOrWeights(weights)); return tensorflow::Status::OK(); @@ -1522,19 +1544,115 @@ tensorflow::Status ConvertPad(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConcat( + Converter& ctx, tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + + // not including the last input (axis) here + int input_size = static_cast(inputs.size()) - 1; + + if (!inputs.at(0).is_tensor()) + return tensorflow::errors::InvalidArgument( + "Concat in TRT support only Tensor input, at " + node_def.name()); + + // We are retrieving the axis + TRT_ShapedWeights axis = inputs.at(input_size).weights(); + + TFAttrs attrs(node_def); + auto attr_size = attrs.at("N")->i(); + auto data_type = attrs.get("T"); + auto index_type = attrs.get("Tidx"); + + // TODO(jie): handle data type + // Only expect to handle INT32 as index attributes for now + if (index_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented("Tidx supports only DT_INT32, at " + + node_def.name()); + + int index = + *(static_cast(const_cast(axis.values_))); + + // TODO(jie): early termination with no-op (attr_size==1) + + auto dim = inputs.at(0).tensor()->getDimensions(); + // dimension check + if (index > dim.nbDims + 1) + return tensorflow::errors::InvalidArgument( + "Concatenate on axis out of dimension range, at " + + node_def.name()); + + if (index == 0) + return tensorflow::errors::InvalidArgument( + "Concatenate on batch dimension not supported, at " + + node_def.name()); + + // incase we need permutation; + std::vector permutation_order(dim.nbDims+1); + + for (int i=0; i inputs_vec; + // Shap chack (all input tensor should have same shape) + // starting from 0 since we are probably also doing transpose here; + for (int i=0; i < input_size; i++) { + auto tensor_i = inputs.at(i).tensor(); + auto dim_i = tensor_i->getDimensions(); + if ( dim_i.nbDims != dim.nbDims ) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent dimensions, at " + + node_def.name()); + + for (int j=0; j < dim.nbDims; j++) { + // check dimension consistency on non-concatenate axis + if (j != index-1 && dim_i.d[j] != dim.d[j]) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent shape, at" + + node_def.name()); + } + + // TRT does concatenation only on channel! + if (index != 1) + tensor_i = ctx.transposeTensor(const_cast(tensor_i), + permutation_order); + + inputs_vec.push_back(tensor_i); + } + + // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( + const_cast(inputs_vec.data()), + inputs_vec.size()); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + if (index != 1) + { + output_tensor= ctx.transposeTensor(output_tensor, permutation_order); + } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; _op_registry["Conv2D"] = ConvertConv2D; _op_registry["Relu"] = ConvertActivation; _op_registry["MaxPool"] = ConvertPool; + _op_registry["AvgPool"] = ConvertPool; // This could be really handled as ConvertBinary _op_registry["BiasAdd"] = ConvertScale; _op_registry["Const"] = ConvertConst; // _op_registry["MatMul"] = ConvertFullyConnected; // not used in vgg // TODO(ben,jie): this is a temp hack. _op_registry["Identity"] = ConvertIdentity; // Identity should be removed - // _op_registry["AvgPool"] = ConvertPool; // resnet_50_v1 slim implementation _op_registry["Add"] = ConvertBinary; @@ -1544,6 +1662,8 @@ void Converter::register_op_converters() { _op_registry["Mean"] = ConvertReduce; _op_registry["Pad"] = ConvertPad; // TODO(ben,jie): Add more ops + + _op_registry["ConcatV2"] = ConvertConcat; } } // namespace -- GitLab From cf30a7549e026d5c50117ae011af2b0148a81a89 Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 25 Jan 2018 17:21:07 -0800 Subject: [PATCH 0012/2939] [UPDATE] Converter update Grouped convolution support added (depthwise as a special case) inception_v2 passed --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 220 +++++++++++------- 2 files changed, 140 insertions(+), 82 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 2b6a26491b..c7fa4144b1 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2" + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index ff2e37b7da..ff47cdfe4a 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -366,15 +366,20 @@ void reorder4(nvinfer1::DimsNCHW shape, T const* idata, } void reorder_rsck_to_kcrs(TRT_ShapedWeights const& iweights, - TRT_ShapedWeights* oweights) { + TRT_ShapedWeights* oweights, int nbGroups) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; - int c = iweights.shape_.d[2]; - int k = iweights.shape_.d[3]; - oweights->shape_.d[0] = k; - oweights->shape_.d[1] = c; + // TRT requires GKcRS, while TF depthwise has RSCK + // where c=1, C=G + LOG(DEBUG) << "nbGroups: " << nbGroups; + int c = iweights.shape_.d[2]/nbGroups; + LOG(DEBUG) << "c" << iweights.shape_.d[2] << " then " << c; + int k = iweights.shape_.d[3]*nbGroups; + LOG(DEBUG) << "k" << iweights.shape_.d[3] << " then " << k; + oweights->shape_.d[0] = k/nbGroups; + oweights->shape_.d[1] = c*nbGroups; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; // nvinfer1::DimsNCHW istrides = {1, s, c*r*s, r*s}; @@ -911,87 +916,23 @@ tensorflow::Status BinaryTensorOpWeight( return tensorflow::Status::OK(); } -tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, tensorflow::NodeDef const& node_def, - const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, - std::vector* outputs) { - static const std::unordered_map - ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - // {"max", nvinfer1::ElementWiseOperation::kMAX}, - // {"min", nvinfer1::ElementWiseOperation::kMIN}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; - - // FIXME assume type matches input weights - // get trt type & shape - TFAttrs attrs(node_def); - // maybe this part has to be moved into the block of rsqrt later - nvinfer1::DataType dtype = attrs.get("T"); - - // check type consistency - CHECK_EQ_TYPE(tensor_l->getType(), dtype); - CHECK_EQ_TYPE(tensor_r->getType(), dtype); - auto op_pair = ops.find(node_def.op()); - if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); - - nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( - *const_cast(tensor_l), - *const_cast(tensor_r), op_pair->second); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - // pass the output - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertPlaceholder( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, - std::vector* outputs) { - LOG(DEBUG) << "Placeholder should have been replace already"; - return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); - // OK this make sense since we are supposed to replace it with input - TFAttrs attrs(node_def); - nvinfer1::DataType dtype = attrs.get("dtype"); - nvinfer1::Dims dims = attrs.get("shape"); - - dims.nbDims--; - for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; - - nvinfer1::ITensor* output = - ctx.network()->addInput(node_def.name().c_str(), dtype, dims); - if (!output) { - return tensorflow::errors::InvalidArgument("Failed to create Input layer"); - } - outputs->push_back(TRT_TensorOrWeights(output)); - return tensorflow::Status::OK(); -} +enum class ConvolutionType { + DEFAULT, + DEPTHWISE_CONV +}; -tensorflow::Status ConvertConv2D(Converter& ctx, +tensorflow::Status ConvertConv2DHelper( + Converter& ctx, tensorflow::NodeDef const& node_def, std::vector const& inputs, - std::vector* outputs) { + std::vector* outputs, + int group // group ==0 specifies depthwise conv + ) { nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - // nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - // TODO(jie): handle NHWC/NCHW transpose; - TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - reorder_rsck_to_kcrs(weights_rsck, &weights); - TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0]; - nvinfer1::DimsHW kernel_size; - kernel_size.h() = weights.shape_.d[2]; - kernel_size.w() = weights.shape_.d[3]; - LOG(DEBUG) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); + TFAttrs attrs(node_def); + int c_index = 1; int h_index = 2; int w_index = 3; auto data_format = attrs.get("data_format"); @@ -1000,17 +941,36 @@ tensorflow::Status ConvertConv2D(Converter& ctx, {0, 3, 1, 2}); h_index = 1; w_index = 2; + c_index = 3; // TODO(jie): transpose it } else { LOG(DEBUG) << "NCHW !!!!"; } + + // tensor after transpose (NCHW) + auto tensor_dim = tensor->getDimensions(); + + int nbGroups = group; + if (nbGroups == 0) // depthwise convolution + nbGroups = tensor_dim.d[0]; + LOG(DEBUG) << "groups count: " << nbGroups; + + TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); + reorder_rsck_to_kcrs(weights_rsck, &weights, nbGroups); + TRT_ShapedWeights biases(weights.type_); + int noutput = weights.shape_.d[0] * nbGroups; + nvinfer1::DimsHW kernel_size; + kernel_size.h() = weights.shape_.d[2]; + kernel_size.w() = weights.shape_.d[3]; + LOG(DEBUG) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); + // TODO(jie): stride. (NHWC/NCHW) auto tf_stride = attrs.get>("strides"); LOG(DEBUG) << "h_INDEX" << h_index << ", w_index " << w_index; LOG(DEBUG) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); - auto tensor_dim = tensor->getDimensions(); std::vector> padding; // TODO(jie): padding. if (attrs.get("padding") == "SAME") { @@ -1055,6 +1015,7 @@ tensorflow::Status ConvertConv2D(Converter& ctx, layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); + layer->setNbGroups(nbGroups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); @@ -1071,6 +1032,102 @@ tensorflow::Status ConvertConv2D(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConv2DHelper( + Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs, + ConvolutionType type) { + switch(type) { + case ConvolutionType::DEFAULT: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); + case ConvolutionType::DEPTHWISE_CONV: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); + } + return tensorflow::errors::Unimplemented( + "unsupported convolution type at, " + node_def.name()); +} + +tensorflow::Status BinaryTensorOpTensor( + Converter& ctx, tensorflow::NodeDef const& node_def, + const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, + std::vector* outputs) { + static const std::unordered_map + ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + // {"max", nvinfer1::ElementWiseOperation::kMAX}, + // {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; + + // FIXME assume type matches input weights + // get trt type & shape + TFAttrs attrs(node_def); + // maybe this part has to be moved into the block of rsqrt later + nvinfer1::DataType dtype = attrs.get("T"); + + // check type consistency + CHECK_EQ_TYPE(tensor_l->getType(), dtype); + CHECK_EQ_TYPE(tensor_r->getType(), dtype); + auto op_pair = ops.find(node_def.op()); + if (op_pair == ops.end()) + return tensorflow::errors::Unimplemented( + "binary op: " + node_def.op() + + " not supported at: " + node_def.name()); + + nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( + *const_cast(tensor_l), + *const_cast(tensor_r), op_pair->second); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + // pass the output + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertPlaceholder( + Converter& ctx, tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + LOG(DEBUG) << "Placeholder should have been replace already"; + return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); + // OK this make sense since we are supposed to replace it with input + TFAttrs attrs(node_def); + nvinfer1::DataType dtype = attrs.get("dtype"); + nvinfer1::Dims dims = attrs.get("shape"); + + dims.nbDims--; + for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; + + nvinfer1::ITensor* output = + ctx.network()->addInput(node_def.name().c_str(), dtype, dims); + if (!output) { + return tensorflow::errors::InvalidArgument("Failed to create Input layer"); + } + outputs->push_back(TRT_TensorOrWeights(output)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertConv2D(Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEFAULT); +} + +tensorflow::Status ConvertConv2DDepthwise( + Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEPTHWISE_CONV); +} + tensorflow::Status ConvertPool(Converter& ctx, tensorflow::NodeDef const& node_def, std::vector const& inputs, @@ -1644,6 +1701,7 @@ void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; _op_registry["Conv2D"] = ConvertConv2D; + _op_registry["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; _op_registry["Relu"] = ConvertActivation; _op_registry["MaxPool"] = ConvertPool; _op_registry["AvgPool"] = ConvertPool; -- GitLab From df7c1e534f53c9c9173d07947a85531f69efb081 Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Mon, 22 Jan 2018 21:31:47 -0800 Subject: [PATCH 0013/2939] add remove control deps transform --- tensorflow/tools/graph_transforms/BUILD | 1 + .../remove_control_dependencies.cc | 34 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tensorflow/tools/graph_transforms/remove_control_dependencies.cc diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index b5465b7fb3..de4821340e 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -102,6 +102,7 @@ cc_library( "remove_ema.cc", "obfuscate_names.cc", "remove_attribute.cc", + "remove_control_dependencies.cc", "remove_device.cc", "remove_nodes.cc", "rename_attribute.cc", diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc new file mode 100644 index 0000000000..a351e6812b --- /dev/null +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -0,0 +1,34 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace tensorflow { +namespace graph_transforms { + +// Changes the op type of a specified op. +Status RemoveControlDependencies(const GraphDef& input_graph_def, + const TransformFuncContext& context, + GraphDef* output_graph_def) { + output_graph_def->Clear(); + for (const NodeDef& node : input_graph_def.node()) { + NodeDef* new_node = output_graph_def->mutable_node()->Add(); + *new_node = node; + new_node->clear_input(); + for (const auto& input : node.input()) { + if (input[0] != '^') { + new_node->add_input(input); + } + } + } + return Status::OK(); +} + +REGISTER_GRAPH_TRANSFORM("remove_control_dependencies", RemoveControlDependencies); + +} // namespace graph_transforms +} // namespace tensorflow -- GitLab From 6e505506524a10314c611b3f65127d847f69a1a0 Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Tue, 23 Jan 2018 12:22:12 -0800 Subject: [PATCH 0014/2939] Add better docs --- .../tools/graph_transforms/remove_control_dependencies.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index a351e6812b..d4c369f148 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -10,7 +10,10 @@ namespace tensorflow { namespace graph_transforms { -// Changes the op type of a specified op. +// Remove control depdencies in preparation for inference. +// In the tensorflow graph, control dependencies are represented as extra +// inputs which are referenced with "^tensor_name". +// See node_def.proto for more details. Status RemoveControlDependencies(const GraphDef& input_graph_def, const TransformFuncContext& context, GraphDef* output_graph_def) { -- GitLab From c2fb1d9ca46f1a2cb24452c20655ee1600fe3e41 Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Tue, 23 Jan 2018 12:28:00 -0800 Subject: [PATCH 0015/2939] remove unnecessary includes --- .../tools/graph_transforms/remove_control_dependencies.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index d4c369f148..901ddb8962 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -1,10 +1,5 @@ -#include #include #include -#include -#include -#include -#include #include namespace tensorflow { -- GitLab From 873f45e7150929e3427f2a504451de71d974686d Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Mon, 29 Jan 2018 15:46:11 -0800 Subject: [PATCH 0016/2939] add copyright and docs to readme --- tensorflow/tools/graph_transforms/README.md | 7 +++++++ .../remove_control_dependencies.cc | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/tensorflow/tools/graph_transforms/README.md b/tensorflow/tools/graph_transforms/README.md index 345d9eadb8..67badb4869 100644 --- a/tensorflow/tools/graph_transforms/README.md +++ b/tensorflow/tools/graph_transforms/README.md @@ -639,6 +639,13 @@ specified devices may not be available. In order to work with graphs like these, you can run this transform to wipe the slate clean and delete the device specifier from all ops. +### remove_control_dependencies + +Args: None \ +Prerequisites: None + +Removes all control dependencies from the graph. + ### remove_nodes Args: diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index 901ddb8962..ba6df633be 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -1,3 +1,17 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #include #include #include -- GitLab From 51ce6cf02c0a445e1a7c89225353ff20fdb538cb Mon Sep 17 00:00:00 2001 From: Jie Date: Tue, 30 Jan 2018 10:43:21 -0800 Subject: [PATCH 0017/2939] [DEBUG] Converter update 1. ConvertConst float length doesn't match tensor shape. handling default broadcast. -> fixed resnet_200 2. Control dependency edge normalizing (remove '^' prefix) -> fixed inception_resnet_v2 --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 39 +++++++++++++------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index c7fa4144b1..185451e28b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -280,7 +280,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( for (auto node : output_names) output_nodes.insert(node); // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 2; + segment_options.minimum_segment_size = 10; tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index ff47cdfe4a..6cdfc837fc 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -437,8 +437,14 @@ class Converter { tensorflow::NodeDef const& node_def) { std::vector inputs; for (auto const& input_name : node_def.input()) { - LOG(DEBUG) << "retrieve input: " << input_name; - inputs.push_back(_trt_tensors.at(input_name)); + std::string name = input_name[0] == '^'? input_name.substr(1) : input_name; + LOG(DEBUG) << "retrieve input: " << name; + if (_trt_tensors.count(name)) { + inputs.push_back(_trt_tensors.at(name)); + } else { + LOG(FATAL) << "input: " << name << "not availabled for node at, " + << node_def.name(); + } } return inputs; } @@ -462,6 +468,7 @@ class Converter { } tensorflow::Status convert_node(tensorflow::NodeDef const& node_def) { + //LOG(DEBUG) << node_def.DebugString(); std::vector inputs = this->get_inputs(node_def); std::string op = node_def.op(); if (!_op_registry.count(op)) { @@ -1292,20 +1299,24 @@ tensorflow::Status ConvertConst(Converter& ctx, nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { LOG(DEBUG) << "dimensions: " << tensor.dims(); - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - get_tensor_shape(tensor)); + scalar_shape = get_tensor_shape(tensor); + if (get_shape_size(scalar_shape) != weights_tensor.float_val_size()) { + LOG(FATAL) << "Broadcast on weights not supported, at: " + << node_def.name(); + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - scalar_shape.d[0] = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - scalar_shape); } + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + scalar_shape); // LOG(INFO) << " add: " << weights_tensor.float_val().data(); // LOG(INFO) << " value: " << (*weights_tensor.float_val().data()); @@ -1317,20 +1328,24 @@ tensorflow::Status ConvertConst(Converter& ctx, nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { LOG(DEBUG) << "dimensions: " << tensor.dims(); - weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), - get_tensor_shape(tensor)); + scalar_shape = get_tensor_shape(tensor); + if (get_shape_size(scalar_shape) != weights_tensor.int_val_size()) { + LOG(FATAL) << "Broadcast on weights not supported, at: " + << node_def.name(); + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - scalar_shape.d[0] = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), - scalar_shape); } + weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), + scalar_shape); } else if (!weights_tensor.tensor_content().empty()) { LOG(DEBUG) << "TENSOR!!!" << node_def.name(); weights = TRT_ShapedWeights(dtype, weights_tensor.tensor_content().data(), -- GitLab From 359329893e9db38d08be605bad85c3d3eef1a4cd Mon Sep 17 00:00:00 2001 From: Jie Date: Tue, 30 Jan 2018 21:31:10 -0800 Subject: [PATCH 0018/2939] [Debug + Feature] Feature: input tensor shape inference passing output_edge_map to allow ops absorbed by TRT subgraph to infer shape without running another shape infer Debug: fixed BiasAdd broadcasting Debug: fixed rewiring input edge to TRT_ENGINE_OP TODO: incoming edge check (shape / dimension) TRT dimension requirement for 3.1 makes input tensor with 2 dimension (NC) tricky to interpret. --- .../contrib/tensorrt/convert/convert_graph.cc | 38 +++++- .../contrib/tensorrt/convert/convert_nodes.cc | 118 ++++++++++++++---- .../contrib/tensorrt/convert/convert_nodes.h | 1 + .../contrib/tensorrt/kernels/trt_engine_op.cc | 3 +- 4 files changed, 134 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 185451e28b..258a850b21 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -77,8 +77,10 @@ void GetSubGraphIncomingEdges(tensorflow::Graph const& graph, for (tensorflow::Edge const* edge : node->in_edges()) { if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { - LOG(DEBUG) << edge->src()->name() << ", "; + LOG(DEBUG) << edge->src()->name() << " Y, "; incoming_edges->insert(edge); + } else { + LOG(DEBUG) << edge->src()->name() << " N, "; } } } @@ -93,7 +95,10 @@ void GetSubGraphOutgoingEdges(tensorflow::Graph const& graph, for (tensorflow::Edge const* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { + LOG(DEBUG) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); + } else { + LOG(DEBUG) << edge->dst()->name() << " N, "; } } } @@ -126,6 +131,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::Graph& graph, const std::vector& output_names, const std::set& subgraph_node_ids, size_t max_batch_size, size_t max_workspace_size, + std::unordered_map>* output_edge_map, const tensorflow::grappler::GraphProperties& graph_properties) { tensorflow::EdgeSet subgraph_incoming_edges; GetSubGraphIncomingEdges(graph, subgraph_node_ids, &subgraph_incoming_edges); @@ -164,10 +170,32 @@ tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::NodeDef trt_node_def; TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size, graph_properties, &trt_node_def)); + max_batch_size, max_workspace_size, graph_properties, output_edge_map, + &trt_node_def)); tensorflow::Status status; tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); + // AddNode does not wire edges. + // Re-map incoming edges to use the new TRT node instead of the orig subgraph + std::map, int> subgraph_edge_to_input_map; + for (size_t i=0; i old_src = {edge->src()->id(), edge->src_output()}; + int new_src_output = subgraph_edge_to_input_map.at(old_src); + graph.AddEdge( + edge->src(), edge->src_output(), trt_node, new_src_output); + graph.RemoveEdge(edge); + } + + + LOG(DEBUG) << "new wiring edges: " << trt_node->in_edges().size(); + for (tensorflow::Edge const* edge : trt_node->in_edges()) { + LOG(DEBUG) << edge->src()->name() << " port: " << edge->src_output(); + } + TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph @@ -176,6 +204,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); + LOG(DEBUG) << "OUT going edge size: " << subgraph_outgoing_edges.size(); for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); @@ -280,7 +309,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( for (auto node : output_names) output_nodes.insert(node); // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 10; + segment_options.minimum_segment_size = 2; tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -292,6 +321,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); + std::unordered_map> output_edge_map; for (std::set const& subgraph_node_names : segments) { std::set subgraph_node_ids; for (std::string const& node_name : subgraph_node_names) { @@ -299,7 +329,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, static_graph_properties)); + max_workspace_size, &output_edge_map, static_graph_properties)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 6cdfc837fc..bf6a9be8be 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -437,6 +437,17 @@ class Converter { tensorflow::NodeDef const& node_def) { std::vector inputs; for (auto const& input_name : node_def.input()) { + /************************************************************************* + * TODO(jie) handle case 1) here + * Normalizes the inputs and extracts associated metadata: + * 1) Inputs can contain a colon followed by a suffix of characters. + * That suffix may be a single number (e.g. inputName:1) or several + * word characters separated from a number by a colon + * (e.g. inputName:foo:1). The + * latter case is used to denote inputs and outputs of functions. + * 2) Control dependency inputs contain caret at the beginning and we + * remove this and annotate the edge as a control dependency. + ************************************************************************/ std::string name = input_name[0] == '^'? input_name.substr(1) : input_name; LOG(DEBUG) << "retrieve input: " << name; if (_trt_tensors.count(name)) { @@ -1261,9 +1272,26 @@ tensorflow::Status ConvertScale(Converter& ctx, } else { LOG(DEBUG) << "NCHW !!!!"; } + + auto dims = tensor->getDimensions(); + LOG(DEBUG) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + LOG(DEBUG) << "i: " << dims.d[i]; + } + dims = weights.shape_; + LOG(DEBUG) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + LOG(DEBUG) << "i: " << dims.d[i]; + } + + nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; + if (weights.shape_.d[0] == 1) { + mode = nvinfer1::ScaleMode::kUNIFORM; + } + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - weights, empty_weights, empty_weights); + *const_cast(tensor), mode, weights, + empty_weights, empty_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); if (data_format == "NHWC") { @@ -1299,11 +1327,21 @@ tensorflow::Status ConvertConst(Converter& ctx, nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { LOG(DEBUG) << "dimensions: " << tensor.dims(); + LOG(DEBUG) << "size: " << weights_tensor.float_val_size(); scalar_shape = get_tensor_shape(tensor); + for (int i=0; i < scalar_shape.nbDims; i++) LOG(DEBUG) << scalar_shape.d[i]; if (get_shape_size(scalar_shape) != weights_tensor.float_val_size()) { - LOG(FATAL) << "Broadcast on weights not supported, at: " - << node_def.name(); + if (weights_tensor.float_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.float_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); } + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; @@ -1330,9 +1368,17 @@ tensorflow::Status ConvertConst(Converter& ctx, LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape = get_tensor_shape(tensor); if (get_shape_size(scalar_shape) != weights_tensor.int_val_size()) { - LOG(FATAL) << "Broadcast on weights not supported, at: " - << node_def.name(); + if (weights_tensor.int_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.int_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); } + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; @@ -1747,6 +1793,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( const std::vector>& output_inds, size_t max_batch_size, size_t max_workspace_size, const tensorflow::grappler::GraphProperties& graph_properties, + std::unordered_map>* output_edge_map, tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. @@ -1800,21 +1847,39 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( int output_idx = input.second; tensorflow::Node* node = graph.FindNodeId(node_id); auto node_name = node->name(); - input_names.push_back(node_name); // insert original node name without port + // input_names should use the node name in the graph + // insert original node name without port + input_names.push_back(node_name); + + auto tensor_name = node_name; + if (output_idx != 0) + tensor_name = tensor_name + ":" + std::to_string(output_idx); + + LOG(DEBUG) << "input name: " << node_name << " tensor_name: " << tensor_name << " idx: " << output_idx; + + auto shape_inference_node_name = node_name; + auto shape_inference_output_idx = output_idx; + // rewire the shape inference to original node in the graph + if (output_edge_map->count(tensor_name)) { + shape_inference_node_name = output_edge_map->at(tensor_name).second; + shape_inference_output_idx = output_edge_map->at(tensor_name).first; + } + LOG(DEBUG) << "shapeinference name: " << shape_inference_node_name << " idx: " << shape_inference_output_idx; + // TODO(jie): alternative :) - // tensorflow::DataType tf_dtype = node->output_type(output_idx); - if (!graph_properties.HasOutputProperties(node_name)) + // tensorflow::DataType tf_dtype = node->output_type(); + if (!graph_properties.HasOutputProperties(shape_inference_node_name)) return tensorflow::errors::Internal("failed to find input node: " + - node_name); + shape_inference_node_name); - auto op_info_vec = graph_properties.GetOutputProperties(node_name); - if (static_cast(op_info_vec.size()) < output_idx) + auto op_info_vec = graph_properties.GetOutputProperties(shape_inference_node_name); + if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) return tensorflow::errors::Internal( - "accessing output index of: " + std::to_string(output_idx) + - ", at node: " + node_name + "with output entry from shape_map: " + + "accessing output index of: " + std::to_string(shape_inference_output_idx) + + ", at node: " + shape_inference_node_name + " with output entry from shape_map: " + std::to_string(op_info_vec.size())); - auto op_info = op_info_vec.at(output_idx); + auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); @@ -1822,9 +1887,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(convert_dtype(tf_dtype, &dtype)); - LOG(DEBUG) << "accessing output index of: " << std::to_string(output_idx) - << ", at node: " << node_name - << "with output entry from shape_map: " + LOG(DEBUG) << "accessing output index of: " << std::to_string(shape_inference_output_idx) + << ", at node: " << shape_inference_node_name + << " with output entry from shape_map: " << std::to_string(op_info_vec.size()); // TODO(ben,jie): update TRT input format/dimension @@ -1866,15 +1931,26 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "finished conversion"; + // TODO(sami,ben,jie): proper naming! + static int static_id = 0; + std::string engine_name = "my_trt_op" + std::to_string(static_id++); + // Gather output metadata std::vector output_names; std::vector output_dtypes; + int trt_engine_op_output_idx = 0; for (std::pair const& output : output_inds) { int node_id = output.first; int output_idx = output.second; tensorflow::Node* node = graph.FindNodeId(node_id); std::string op_name = node->name(); std::string tensor_name = op_name; + + output_edge_map->insert( + {trt_engine_op_output_idx == 0 ? + engine_name : engine_name + std::to_string(trt_engine_op_output_idx), + {output_idx, tensor_name}}); + if (output_idx != 0) tensor_name = tensor_name + ":" + std::to_string(output_idx); LOG(DEBUG) << "output tensor name: " << tensor_name; @@ -1923,12 +1999,12 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(INFO) << "finished engine"; // Build the TRT op - // TODO(sami,ben,jie): proper naming! - static int static_id = 0; tensorflow::NodeDefBuilder op_builder( - "my_trt_op" + std::to_string(static_id++), "TRTEngineOp"); + engine_name, "TRTEngineOp"); std::vector income_edges; + LOG(DEBUG) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { + LOG(DEBUG) << "input edges: " << std::to_string(i) << " " << input_names.at(i); int output_idx = input_inds.at(i).second; // we wired up the input here already, it is redundant to do it again in // ConvertSubGraphToTensorRT(convert_graph.cc) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index dc59c37892..23ca9fcc82 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -36,6 +36,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( output_inds, // {node_id, output_idx} size_t max_batch_size, size_t max_workspace_size, const tensorflow::grappler::GraphProperties& graph_prop, + std::unordered_map>* output_edge_map, tensorflow::NodeDef* trt_node); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index a1524a592a..445900f08c 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -175,7 +175,8 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->CudaStreamMemberHack())); trt_context_ptr_->enqueue(nbBatch, &buffers[0], *stream, nullptr); - cudaStreamSynchronize(*stream); + // sync should be done by TF. + //cudaStreamSynchronize(*stream); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); -- GitLab From c91050a97b9816627865dd367c93c3ef88ca212f Mon Sep 17 00:00:00 2001 From: Jie Date: Wed, 31 Jan 2018 14:35:49 -0800 Subject: [PATCH 0019/2939] [Feature] subgraph conversion graceful failure conversion failure would result in skipping current subgraph. incoming edge check. require subgraph with incoming edge passing 4 dimensional tensor. TODO binary op -> still needs transpose (since current layout optimization is not working properly --- .../contrib/tensorrt/convert/convert_graph.cc | 17 +++++++++++++---- .../contrib/tensorrt/convert/convert_nodes.cc | 9 ++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 258a850b21..34a2e9ce6a 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -143,6 +143,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( for (tensorflow::Edge const* edge : subgraph_incoming_edges) { subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } + std::set> subgraph_outputs_set; // Collect outputs referenced from output_names auto output_name_to_index_map = BuildTensorNameMap(output_names); @@ -168,11 +169,11 @@ tensorflow::Status ConvertSubGraphToTensorRT( subgraph_outputs_set.begin(), subgraph_outputs_set.end()); // Build TensorRT node and add it to the graph tensorflow::NodeDef trt_node_def; + tensorflow::Status status; TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, max_batch_size, max_workspace_size, graph_properties, output_edge_map, &trt_node_def)); - tensorflow::Status status; tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); // AddNode does not wire edges. @@ -253,6 +254,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( // virtual cluster tensorflow::DeviceProperties device_properties; + device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); gCluster = @@ -322,14 +324,21 @@ tensorflow::Status ConvertGraphDefToTensorRT( std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); std::unordered_map> output_edge_map; + int count = 0; for (std::set const& subgraph_node_names : segments) { std::set subgraph_node_ids; for (std::string const& node_name : subgraph_node_names) { subgraph_node_ids.insert(node_map.at(node_name)->id()); } - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, &output_edge_map, static_graph_properties)); + tensorflow::Status status = + ConvertSubGraphToTensorRT(graph, output_names, subgraph_node_ids, + max_batch_size, max_workspace_size, &output_edge_map, + static_graph_properties); + if ( status != tensorflow::Status::OK()) { + LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count + << " due to: \n" << status.ToString() << "SKIPPING......"; + } + count++; } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index bf6a9be8be..da6252b25d 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1866,8 +1866,6 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } LOG(DEBUG) << "shapeinference name: " << shape_inference_node_name << " idx: " << shape_inference_output_idx; - // TODO(jie): alternative :) - // tensorflow::DataType tf_dtype = node->output_type(); if (!graph_properties.HasOutputProperties(shape_inference_node_name)) return tensorflow::errors::Internal("failed to find input node: " + shape_inference_node_name); @@ -1885,7 +1883,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - TF_CHECK_OK(convert_dtype(tf_dtype, &dtype)); + TF_RETURN_IF_ERROR(convert_dtype(tf_dtype, &dtype)); LOG(DEBUG) << "accessing output index of: " << std::to_string(shape_inference_output_idx) << ", at node: " << shape_inference_node_name @@ -1896,6 +1894,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + // TODO(jie): TRT 3.x only support 4 dimensional input tensor. + // update the code once TRT 4.0 comes out. + if (op_info.shape().dim_size() != 4) + return tensorflow::errors::Unimplemented("require 4 dimensional input"); + for (int i = 1; i < op_info.shape().dim_size(); i++) { LOG(DEBUG) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); -- GitLab From 45487b143f890eac31844bfdea171954ddae9e38 Mon Sep 17 00:00:00 2001 From: Jie Date: Wed, 31 Jan 2018 21:13:07 -0800 Subject: [PATCH 0020/2939] [UPDATE] 1. debug binary ops: transpose added again since TF layout optimization is not sufficient 2. debug consecutive trt_engine_op binding names TODO: binding names + input wiring needs refactoring Also change the trt_engine_op attrs (input/output nodes might not be necessary --- .../contrib/tensorrt/convert/convert_nodes.cc | 99 ++++++++++++------- 1 file changed, 63 insertions(+), 36 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index da6252b25d..5df1132f01 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -449,6 +449,10 @@ class Converter { * remove this and annotate the edge as a control dependency. ************************************************************************/ std::string name = input_name[0] == '^'? input_name.substr(1) : input_name; + auto first = name.find_first_of(':'); + if (first != std::string::npos && first+2 == name.size() && name[first+1]=='0') + name.erase(first); + LOG(DEBUG) << "retrieve input: " << name; if (_trt_tensors.count(name)) { inputs.push_back(_trt_tensors.at(name)); @@ -833,9 +837,12 @@ tensorflow::Status BinaryTensorOpWeight( auto dims_w = weights.shape_; auto dims_t = tensor->getDimensions(); - // default to channel-wise + // default to element-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + // TODO(jie): maybe use a permuatation instead to support more cases; + bool permutation_flag = false; + /* if (weights.count() == 1) { LOG(DEBUG) << "UNIFORM"; @@ -857,44 +864,63 @@ tensorflow::Status BinaryTensorOpWeight( scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { // no broadcasting on Batch dimension; - assert(dims_w.d[0]==1); - - // broadcasting on Channel dimension only allowed in kUNIFORM - assert(dims_w.d[1]==dims_t.d[0]); - assert(dims_w.nbDims==dims_t.nbDims); - - // default is element; - for (int i=2; i permutation(dims_t.nbDims + 1); - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { - // we swap the last dimension into channel for trt. - // because of tensorflow default broadcasting rules. - for (int i = 0; i < static_cast(permutation.size()); i++) { - permutation[i] = i; + if (permutation_flag) { + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { + // we swap the last dimension into channel for trt. + // because of tensorflow default broadcasting rules. + for (int i = 0; i < static_cast(permutation.size()); i++) { + permutation[i] = i; + } + permutation[1] = dims_t.nbDims; + permutation[dims_t.nbDims] = 1; + tensor = ctx.transposeTensor(const_cast(tensor), + permutation); + } else { + return tensorflow::errors::InvalidArgument( + "Transpose cannot be applied, " + node_def.name()); } - permutation[1] = dims_t.nbDims; - permutation[dims_t.nbDims] = 1; - tensor = ctx.transposeTensor(const_cast(tensor), - permutation); } - */ // prepare weights TRT_ShapedWeights shiftWeights(weights.type_); @@ -923,11 +949,9 @@ tensorflow::Status BinaryTensorOpWeight( nvinfer1::ITensor* output_tensor = layer->getOutput(0); // transpose back dimension - /* - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { + if (permutation_flag) { output_tensor = ctx.transposeTensor(output_tensor, permutation); } - */ // pass the output outputs->push_back(TRT_TensorOrWeights(output_tensor)); @@ -1847,9 +1871,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( int output_idx = input.second; tensorflow::Node* node = graph.FindNodeId(node_id); auto node_name = node->name(); + // input_names should use the node name in the graph + // here it should be the input tensor name -> matching the binding // insert original node name without port - input_names.push_back(node_name); + // input_names.push_back(node_name); auto tensor_name = node_name; if (output_idx != 0) @@ -1910,6 +1936,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( if (output_idx != 0) input_tensor_name = node_name + ":" + std::to_string(output_idx); + input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -1951,9 +1978,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( output_edge_map->insert( {trt_engine_op_output_idx == 0 ? - engine_name : engine_name + std::to_string(trt_engine_op_output_idx), + engine_name : engine_name + ":" + std::to_string(trt_engine_op_output_idx), {output_idx, tensor_name}}); - + trt_engine_op_output_idx++; if (output_idx != 0) tensor_name = tensor_name + ":" + std::to_string(output_idx); LOG(DEBUG) << "output tensor name: " << tensor_name; @@ -1999,7 +2026,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // engine_out << engine_plan_string; // engine_out.close(); - LOG(INFO) << "finished engine"; + LOG(INFO) << "finished engine" << engine_name; // Build the TRT op tensorflow::NodeDefBuilder op_builder( -- GitLab From 10a642da150356d1072e9a5197967f3f3a2bcd7b Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 1 Feb 2018 07:13:40 -0800 Subject: [PATCH 0021/2939] [UPDATE] converter update: MatMul added TODO: reshape --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 67 ++++++++++++++++++- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 34a2e9ce6a..254a428104 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2", "DepthwiseConv2dNative" + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" , "MatMul" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 5df1132f01..6c0ee5e527 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -347,7 +347,7 @@ template <> tensorflow::DataType TFAttrs::get(std::string key) const { return this->at(key)->type(); } - +// TODO(jie): reorder4 & reorder2 should be merged? template void reorder4(nvinfer1::DimsNCHW shape, T const* idata, nvinfer1::DimsNCHW istrides, T* odata, @@ -365,6 +365,38 @@ void reorder4(nvinfer1::DimsNCHW shape, T const* idata, } } +template +void reorder2(nvinfer1::DimsHW shape, T const* idata, + nvinfer1::DimsHW istrides, T* odata, + nvinfer1::DimsHW ostrides) { + for (int h = 0; h < shape.h(); ++h) { + for (int w = 0; w < shape.w(); ++w) { + odata[h * ostrides.h() + w * ostrides.w()] + = idata[h * ostrides.h() + w * ostrides.w()]; + } + } +} + +// TODO(jie): fail to tensorflow!! +void reorder_ck_to_kc(TRT_ShapedWeights const& iweights, + TRT_ShapedWeights* oweights) { + int c = iweights.shape_.d[0]; + int k = iweights.shape_.d[1]; + oweights->shape_.d[0] = k; + oweights->shape_.d[1] = c; + nvinfer1::DimsHW istrides = {1, k}; + nvinfer1::DimsHW ostrides = {c, 1}; + switch (iweights.type_) { + case tensorflow::DataType::DT_FLOAT: + reorder2( + {k, c}, static_cast(iweights.values_), istrides, + static_cast(const_cast(oweights->values_)), ostrides); + break; + default: + LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; + } +} + void reorder_rsck_to_kcrs(TRT_ShapedWeights const& iweights, TRT_ShapedWeights* oweights, int nbGroups) { CHECK_EQ(iweights.type_, oweights->type_); @@ -382,7 +414,6 @@ void reorder_rsck_to_kcrs(TRT_ShapedWeights const& iweights, oweights->shape_.d[1] = c*nbGroups; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; - // nvinfer1::DimsNCHW istrides = {1, s, c*r*s, r*s}; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { @@ -1782,6 +1813,37 @@ tensorflow::Status ConvertConcat( return tensorflow::Status::OK(); } +tensorflow::Status ConvertMatMul( + Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + + // TODO(jie): transpose! + TFAttrs attrs(node_def); + //bool transpose_w = bool(attrs->at("transpose_b")->i()); + + // tensor after transpose (NCHW) + auto tensor_dim = tensor->getDimensions(); + + TRT_ShapedWeights weights_ck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); + reorder_ck_to_kc(weights_ck, &weights); + TRT_ShapedWeights biases(weights.type_); + + int noutput = weights.shape_.d[0]; + + nvinfer1::IFullyConnectedLayer* layer = + ctx.network()->addFullyConnected(*const_cast(tensor), + noutput, weights, biases); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); + +} + void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; @@ -1804,6 +1866,7 @@ void Converter::register_op_converters() { _op_registry["Rsqrt"] = ConvertUnary; _op_registry["Mean"] = ConvertReduce; _op_registry["Pad"] = ConvertPad; + _op_registry["MatMul"] = ConvertMatMul; // TODO(ben,jie): Add more ops _op_registry["ConcatV2"] = ConvertConcat; -- GitLab From c5d9369831bfcb66ea54f06349ebae5979c4912d Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 1 Feb 2018 09:43:24 -0800 Subject: [PATCH 0022/2939] [debug] binary op mode/dimension bug fixed TODO: reshape / debug Matmul --- .../contrib/tensorrt/convert/convert_graph.cc | 3 ++- .../contrib/tensorrt/convert/convert_nodes.cc | 24 ++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 254a428104..e9ab542f31 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,8 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2", "DepthwiseConv2dNative" , "MatMul" + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" //, "MatMul", + //"Reshape" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 6c0ee5e527..c697093d12 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -897,17 +897,22 @@ tensorflow::Status BinaryTensorOpWeight( // no broadcasting on Batch dimension; LOG(DEBUG) << "WEIGHTS DIM: " << dims_w.nbDims << " tensor DIM: " << dims_t.nbDims; - if (dims_w.nbDims==dims_t.nbDims && dims_w.d[0]==1) { - for (int i=1; i Date: Thu, 1 Feb 2018 09:56:43 -0800 Subject: [PATCH 0023/2939] Adding Release notes for r1.6. --- RELEASE.md | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 5 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index af6440acef..728a840002 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,11 +1,43 @@ -# Release 1.5.0 +# Release 1.6.0 ## Breaking Changes * Prebuilt binaries are now built against CUDA 9.0 and cuDNN 7. -* Our Linux binaries are built using ubuntu 16 containers, potentially - introducing glibc incompatibility issues with ubuntu 14. -* Starting from 1.6 release, our prebuilt binaries will use AVX instructions. - This may break TF on older CPUs. +* Prebuilt binaries will use AVX instructions. This may break TF on older CPUs. + +## Major Features And Improvements +* New Optimizer internal API for non-slot variables. Descendants of AdamOptimizer that access _beta[12]_power will need to be updated. +* `tf.estimator.{FinalExporter,LatestExporter}` now export stripped SavedModels. This improves forward compatibility of the SavedModel. +* FFT support added to XLA CPU/GPU. + +## Bug Fixes and Other Changes +* Documentation updates: + * Added a second version of Getting Started, which is aimed at ML +newcomers. + * Clarified documentation on `resize_images.align_corners` parameter. + * Additional documentation for TPUs. +* Google Cloud Storage (GCS): + * Add client-side throttle. + * Add a `FlushCaches()` method to the FileSystem interface, with an implementation for GcsFileSystem. +* Other: + * Add `tf.contrib.distributions.Kumaraswamy`. + * `RetryingFileSystem::FlushCaches()` calls the base FileSystem's `FlushCaches()`. + * Add auto_correlation to distributions. + * Add `tf.contrib.distributions.Autoregressive`. + * Add SeparableConv1D layer. + * Add convolutional Flipout layers. + * When both inputs of `tf.matmul` are bfloat16, it returns bfloat16, instead of float32. + * Added `tf.contrib.image.connected_components`. + * Add `tf.contrib.framework.CriticalSection` that allows atomic variable access. + * Output variance over trees predictions for classifications tasks. + * For `pt` and `eval` commands, allow writing tensor values to filesystem as numpy files. + * gRPC: Propagate truncated errors (instead of returning gRPC internal error). + * Augment parallel_interleave to support 2 kinds of prefetching. + * Improved XLA support for C64-related ops log, pow, atan2, tanh. + * Add probabilistic convolutional layers. + +## API Changes +* Introducing prepare_variance boolean with default setting to False for backward compatibility. +* Move `layers_dense_variational_impl.py` to `layers_dense_variational.py`. ## Known Bugs * Using XLA:GPU with CUDA 9 and CUDA 9.1 results in garbage results and/or @@ -28,6 +60,42 @@ TensorFlow will print a warning if you use XLA:GPU with a known-bad version of CUDA; see e00ba24c4038e7644da417ddc639169b6ea59122. +## Thanks to our Contributors + +This release contains contributions from many people at Google, as well as: + +4d55397500, Ag Ramesh, Aiden Scandella, Akimasa Kimura, Alex Rothberg, Allen Goodman, +amilioto, Andrei Costinescu, Andrei Nigmatulin, Anjum Sayed, Anthony Platanios, +Anush Elangovan, Armando Fandango, Ashish Kumar Ram, Ashwini Shukla, Ben, Bhavani Subramanian, +Brett Koonce, Carl Thomé, cclauss, Cesc, Changming Sun, Christoph Boeddeker, Clayne Robison, +Clemens Schulz, Clint (Woonhyuk Baek), codrut3, Cole Gerdemann, Colin Raffel, Daniel Trebbien, +Daniel Ylitalo, Daniel Zhang, Daniyar, Darjan Salaj, Dave Maclachlan, David Norman, Dong--Jian, +dongsamb, dssgsra, Edward H, eladweiss, elilienstein, Eric Lilienstein, error.d, Eunji Jeong, fanlu, +Florian Courtial, fo40225, Fred, Gregg Helt, Guozhong Zhuang, Hanchen Li, hsm207, hyunyoung2, +ImSheridan, Ishant Mrinal Haloi, Jacky Ko, Jay Young, Jean Flaherty, Jerome, JerrikEph, Jesse +Kinkead, jfaath, Jian Lin, jinghuangintel, Jiongyan Zhang, Joel Hestness, Joel Shor, Johnny Chan, +Julian Niedermeier, Julian Wolff, JxKing, K-W-W, Karl Lessard, Kasper Marstal, Keiji Ariyama, +Koan-Sin Tan, Loki Der Quaeler, Loo Rong Jie, Luke Schaefer, Lynn Jackson, ManHyuk, Matt Basta, +Matt Smith, Matthew Schulkind, Michael, michaelkhan3, Miguel Piedrafita, Mikalai Drabovich, +Mike Knapp, mjwen, mktozk, Mohamed Aly, Mohammad Ashraf Bhuiyan, Myungjoo Ham, Naman Bhalla, +Namrata-Ibm, Nathan Luehr, nathansilberman, Netzeband, Niranjan Hasabnis, Omar Aflak, Ozge +Yalcinkaya, Parth P Panchal, patrickzzy, Patryk Chrabaszcz, Paul Van Eck, Paweł Kapica, Peng Yu, +Philip Yang, Pierre Blondeau, Po-Hsien Chu, powderluv, Puyu Wang, Rajendra Arora, Rasmus, Renat +Idrisov, resec, Robin Richtsfeld, Ronald Eddy Jr, Sahil Singh, Sam Matzek, Sami Kama, sandipmgiri, +Santiago Castro, Sayed Hadi Hashemi, Scott Tseng, Sergii Khomenko, Shahid, Shengpeng Liu, Shreyash +Sharma, Shrinidhi Kl, Simone Cirillo, simsicon, Stanislav Levental, starsblinking, Stephen Lumenta, +Steven Hickson, Su Tang, Taehoon Lee, Takuya Wakisaka, Ted Chang, Ted Ying, Tijmen Verhulsdonck, +Timofey Kondrashov, vade, vaibhav, Valentin Khrulkov, vchigrin, Victor Costan, Viraj Navkal, +Vivek Rane, wagonhelm, Yan Facai (颜发才), Yanbo Liang, Yaroslav Bulatov, yegord, Yong Tang, +Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田传武 + +# Release 1.5.0 + +## Breaking Changes +* Prebuilt binaries are now built against CUDA 9.0 and cuDNN 7. +* Starting from 1.6 release, our prebuilt binaries will use AVX instructions. + This may break TF on older CPUs. + ## Major Features And Improvements * [Eager execution](https://github.com/tensorflow/tensorflow/tree/r1.5/tensorflow/contrib/eager) preview version is now available. -- GitLab From 68c52b926eb8cba2b43a37cde4a9658654427e70 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 1 Feb 2018 10:27:29 -0800 Subject: [PATCH 0024/2939] Adding the known bugs to 1.5 as well. --- RELEASE.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 728a840002..0fad3b5d41 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -214,6 +214,27 @@ Yoni Tsafir, yordun, Yuan (Terry) Tang, Yuxin Wu, zhengdi, Zhengsheng Wei, 田 * Minor refactor: move stats files from `stochastic` to `common` and remove `stochastic`. +## Known Bugs +* Using XLA:GPU with CUDA 9 and CUDA 9.1 results in garbage results and/or + `CUDA_ILLEGAL_ADDRESS` failures. + + Google discovered in mid-December 2017 that the PTX-to-SASS compiler in CUDA 9 + and CUDA 9.1 sometimes does not properly compute the carry bit when + decomposing 64-bit address calculations with large offsets (e.g. `load [x + + large_constant]`) into 32-bit arithmetic in SASS. + + As a result, these versions of `ptxas` miscompile most XLA programs which use + more than 4GB of temp memory. This results in garbage results and/or + `CUDA_ERROR_ILLEGAL_ADDRESS` failures. + + A fix in CUDA 9.1.121 is expected in late February 2018. We do not expect a + fix for CUDA 9.0.x. Until the fix is available, the only workaround is to + [downgrade](https://developer.nvidia.com/cuda-toolkit-archive) to CUDA 8.0.x + or disable XLA:GPU. + + TensorFlow will print a warning if you use XLA:GPU with a known-bad version of + CUDA; see e00ba24c4038e7644da417ddc639169b6ea59122. + ## Thanks to our Contributors This release contains contributions from many people at Google, as well as: -- GitLab From b52390f02678cf3e56319a10bde19a930ca9de78 Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 1 Feb 2018 11:07:53 -0800 Subject: [PATCH 0025/2939] [update] converter update: reshape implemented. I cannot support reshape or MatMul at this moment because of the backend. TODO: wait until TRT 4.0 for backend support on reshape. --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 72 ++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index e9ab542f31..573394f309 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2", "DepthwiseConv2dNative" //, "MatMul", + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" //, "MatMul", //"Reshape" // TODO(ben,jie): ... }; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index c697093d12..09c1b959ce 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1849,6 +1849,76 @@ tensorflow::Status ConvertMatMul( } +tensorflow::Status ConvertReshape(Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + if (inputs.size() != 2 || !inputs.at(0).is_tensor() || + !inputs.at(1).is_weights()) + return tensorflow::errors::InvalidArgument( + "Input expects tensor and weights, at" + node_def.name()); + + // implement tensor binaryOp weight [channel wise] for now; + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + auto dims = tensor->getDimensions(); + // restore implicit batch dimension + int nbDims = dims.nbDims + 1; + + TRT_ShapedWeights shape = inputs.at(1).weights(); + + TFAttrs attrs(node_def); + + auto padding_type = attrs.get("Tshape"); + + if (shape.shape_.nbDims != 1) + return tensorflow::errors::InvalidArgument( + "reshape new shape is not 1 dimensional, at " + node_def.name()); + + // Only expect to handle INT32 as attributes for now + if (padding_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented( + "reshape new shape supports only DT_INT32, at "+ node_def.name()); + + auto shape_data = static_cast(const_cast(shape.values_)); + + if (shape_data[0] != -1) + return tensorflow::errors::InvalidArgument( + "reshape new shape first dimension is not -1, at "+ node_def.name()); + + auto shape_num_dims = shape.shape_.d[0]; + LOG(DEBUG) << "shape dimensions: " << shape_num_dims; + int volume_w = 1; + for (int i = 1; i < shape.shape_.d[0]; i++) + volume_w *= shape_data[i]; + + int volume_t = 1; + for (int i = 0; i < dims.nbDims; i++) + volume_t *= dims.d[i]; + + LOG(DEBUG) << "volume: " << volume_t << " volume weights: " << volume_w; + if (volume_w != volume_t) + return tensorflow::errors::InvalidArgument( + "volume does not agree between tensor and new shape, at "+ node_def.name()); + + nvinfer1::IShuffleLayer* layer = + ctx.network()->addShuffle(*const_cast(tensor)); + + nvinfer1::Dims reshapeDims; + LOG(DEBUG) << "new dimension: " << shape_num_dims-1; + reshapeDims.nbDims = shape_num_dims-1; + for (int32_t i = 0; i < reshapeDims.nbDims; ++i) { + reshapeDims.d[i] = shape_data[i+1]; + } + layer->setReshapeDimensions(reshapeDims); + LOG(DEBUG) << "new dimension: " << shape_num_dims-1; + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + auto dims_output = output_tensor->getDimensions(); + LOG(DEBUG) << "output tensor dimension:" << dims_output.nbDims; + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; @@ -1875,7 +1945,7 @@ void Converter::register_op_converters() { _op_registry["ConcatV2"] = ConvertConcat; _op_registry["MatMul"] = ConvertMatMul; - //_op_registry["Reshape"] = ConvertReshape; + _op_registry["Reshape"] = ConvertReshape; } } // namespace -- GitLab From c63fb0841bc020ddee2a4eba337eca1cc49c2eff Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 11:14:18 -0800 Subject: [PATCH 0026/2939] Update version to 1.6.0-rc0. (#16701) * Updating the version to 1.6.0-rc0. * Removing the escape character. * Updating the cloud tpu version. --- .../eager/python/examples/mnist/mnist.py | 2 +- .../contrib/tpu/profiler/pip_package/setup.py | 2 +- tensorflow/core/public/version.h | 4 ++-- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 10 ++++++--- tensorflow/tools/docker/Dockerfile.devel | 2 +- .../tools/docker/Dockerfile.devel-cpu-mkl | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 13 files changed, 44 insertions(+), 40 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist.py b/tensorflow/contrib/eager/python/examples/mnist/mnist.py index ed7dbc8904..772f59562b 100644 --- a/tensorflow/contrib/eager/python/examples/mnist/mnist.py +++ b/tensorflow/contrib/eager/python/examples/mnist/mnist.py @@ -39,7 +39,7 @@ class MNISTModel(tfe.Network): """MNIST Network. Network structure is equivalent to: - https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/examples/tutorials/mnist/mnist_deep.py + https://github.com/tensorflow/tensorflow/blob/r1.6/tensorflow/examples/tutorials/mnist/mnist_deep.py and https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py index 3dffebe668..cb61984799 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py @@ -20,7 +20,7 @@ from __future__ import print_function from setuptools import setup -_VERSION = '1.5.0-rc1' +_VERSION = '1.6.0-rc0' CONSOLE_SCRIPTS = [ 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index b02f899b87..50bfa91267 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -19,12 +19,12 @@ limitations under the License. // TensorFlow uses semantic versioning, see http://semver.org/. #define TF_MAJOR_VERSION 1 -#define TF_MINOR_VERSION 5 +#define TF_MINOR_VERSION 6 #define TF_PATCH_VERSION 0 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc0" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 14add7c77e..a783205b4a 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.5.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index d2af9d9843..5249e04615 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.5.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index e5388c4b1e..0c6c773e62 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.5.0 + 1.6.0-rc0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.5.0 + 1.6.0-rc0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.5.0 + 1.6.0-rc0 org.tensorflow libtensorflow_jni_gpu - 1.5.0 + 1.6.0-rc0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.5.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.5.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.5.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.5.0.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0-rc0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.5.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0-rc0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.5.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0-rc0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index cd8c14599f..105b225177 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.5.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index f49d3a2f08..a6ea548cfb 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -115,7 +115,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -238,7 +238,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -347,7 +347,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl @@ -520,7 +520,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl
 
@@ -528,5 +528,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.5.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index bc7d2080dc..36dffd85dc 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.5.0 on Linux: +for TensorFlow 1.6.0rc0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.5.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc0-py2-none-any.whl
 
## Validate your installation @@ -460,7 +460,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - + + @@ -478,6 +479,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0rc0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
tensorflow_gpu-1.5.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.079
tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
+ @@ -491,6 +493,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
+ + diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index 5dc4a053fd..d16761c367 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -70,7 +70,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . # TODO(craigcitro): Don't install the pip package, since it makes it # more difficult to experiment with local changes. Instead, just add diff --git a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl index 96b260ad3a..3690e7dfe5 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl +++ b/tensorflow/tools/docker/Dockerfile.devel-cpu-mkl @@ -3,7 +3,7 @@ FROM tensorflow/tensorflow:latest-devel LABEL maintainer="Clayne Robison" # These arguments are parameterized. Use --build-args to override. -ARG TF_BRANCH=r1.5 +ARG TF_BRANCH=r1.6 ARG WHL_DIR=/whl RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 07ffd3839a..4ef37881bc 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -79,7 +79,7 @@ RUN mkdir /bazel && \ # Download and build TensorFlow. WORKDIR /tensorflow -RUN git clone --branch=r1.5 --depth=1 https://github.com/tensorflow/tensorflow.git . +RUN git clone --branch=r1.6 --depth=1 https://github.com/tensorflow/tensorflow.git . # Configure the build for our CUDA configuration. ENV CI_BUILD_PYTHON python diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index d7fab2b93a..2002786999 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.5.0' +_VERSION = '1.6.0-rc0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From a0e31f28df22030b4ce33db49e999f56622aa693 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Fri, 2 Feb 2018 11:02:34 -0800 Subject: [PATCH 0027/2939] Remove all_files rule from com_google_absl.BUILD --- third_party/com_google_absl.BUILD | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/third_party/com_google_absl.BUILD b/third_party/com_google_absl.BUILD index 0c8d327c1f..8fca145f75 100644 --- a/third_party/com_google_absl.BUILD +++ b/third_party/com_google_absl.BUILD @@ -3,15 +3,3 @@ package(default_visibility = ["//visibility:public"]) licenses(["notice"]) # Apache exports_files(["LICENSE"]) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility = ["//tensorflow:__subpackages__"], -) -- GitLab From 0fd0f2ded169c383a564ffbe9564b6c3d5a684f3 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Fri, 2 Feb 2018 12:42:04 -0800 Subject: [PATCH 0028/2939] Fix sanity --- tensorflow/tools/ci_build/ci_sanity.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/ci_sanity.sh b/tensorflow/tools/ci_build/ci_sanity.sh index b3a8ff2ac7..fd5d005844 100755 --- a/tensorflow/tools/ci_build/ci_sanity.sh +++ b/tensorflow/tools/ci_build/ci_sanity.sh @@ -351,7 +351,7 @@ do_external_licenses_check(){ # Whitelist echo ${EXTRA_LICENSE_FILE} - grep -e "@bazel_tools//src/" -e "@bazel_tools//tools/" -e "@com_google_absl//" -e "//external" -e "@local" -v ${EXTRA_LICENSES_FILE} > temp.txt + grep -e "@bazel_tools//src" -e "@bazel_tools//tools/" -e "@com_google_absl//" -e "//external" -e "@local" -v ${EXTRA_LICENSES_FILE} > temp.txt mv temp.txt ${EXTRA_LICENSES_FILE} -- GitLab From 31bfdac6fa5cdf25ebcbf297f54bcaa042f1ec14 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:00:44 -0800 Subject: [PATCH 0029/2939] Revert "Update external protobuf codebase version for Windows cmake build" This reverts commit 07bec47ba5db4c2f2e33ecb49f23253a371bfbbe. --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index fd05fa6d47..aedb793d2a 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) +set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) if(WIN32) set(protobuf_STATIC_LIBRARIES -- GitLab From e8341fe47417fece104f24fc7067c105c507aa95 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:03:49 -0800 Subject: [PATCH 0030/2939] Fixing the typo. --- tensorflow/core/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index a8a8c34846..dba9f6f0e0 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1909,7 +1909,7 @@ cc_library( tf_cuda_library( name = "cuda_device_functions", hdrs = ["util/cuda_device_functions.h"], - cuda_deps = ["//third_party_gpus/cuda:cuda_headers"], + cuda_deps = ["//third_party/gpus/cuda:cuda_headers"], visibility = ["//visibility:public"], deps = [":framework_lite"], ) -- GitLab From b300c52026ba152e62fc6d7e7e8c2cc515677212 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:46:48 -0800 Subject: [PATCH 0031/2939] Fixing the path post transition to GitHub. --- tensorflow/core/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index dba9f6f0e0..5b38c10032 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1909,7 +1909,7 @@ cc_library( tf_cuda_library( name = "cuda_device_functions", hdrs = ["util/cuda_device_functions.h"], - cuda_deps = ["//third_party/gpus/cuda:cuda_headers"], + cuda_deps = ["@local_config_cuda//cuda:cuda_headers"], visibility = ["//visibility:public"], deps = [":framework_lite"], ) -- GitLab From f42bf6706d12369c7686bae1ecac8f6957daa78a Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 13:52:30 -0800 Subject: [PATCH 0032/2939] Revert "Update deprecated API use" This reverts commit ccedcbe14c798fb3b227030cf85b4fe89406f0d8. --- tensorflow/core/distributed_runtime/tensor_coding.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc index 34a4013547..fe2d1a1293 100644 --- a/tensorflow/core/distributed_runtime/tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/tensor_coding.cc @@ -81,7 +81,7 @@ void TensorResponse::InitPartial(const RecvTensorResponse& response) { Status TensorResponse::ParseFrom(Source* source) { if (!on_host_) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited // Pre-parse into local storage, then delegate to device. if (!meta_.ParseFromCodedStream(&input) || !input.ConsumedEntireMessage()) { @@ -217,7 +217,7 @@ bool TensorResponse::ParseTensorSubmessage( bool TensorResponse::ParseFast(Source* source) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited while (true) { auto p = input.ReadTagWithCutoff(127); int tag = GetTagFieldNumber(p.first); -- GitLab From 782ccd93198f0187e4cb52aad489b49c6960d074 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 2 Feb 2018 13:57:11 -0800 Subject: [PATCH 0033/2939] Add k8 to detection for when to use neon_tensor_utils. --- tensorflow/contrib/lite/kernels/internal/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index 38b032c6de..2c29b2abb8 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -330,6 +330,9 @@ cc_library( ":x86": [ ":neon_tensor_utils", ], + ":k8": [ + ":neon_tensor_utils", + ], ":darwin": [ ":neon_tensor_utils", ], -- GitLab From 075482c55bd8fa87eeee0f430ddf38d081cf16ec Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Fri, 2 Feb 2018 14:08:30 -0800 Subject: [PATCH 0034/2939] Update BUILD --- tensorflow/core/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5b38c10032..45edbe9652 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1909,7 +1909,6 @@ cc_library( tf_cuda_library( name = "cuda_device_functions", hdrs = ["util/cuda_device_functions.h"], - cuda_deps = ["@local_config_cuda//cuda:cuda_headers"], visibility = ["//visibility:public"], deps = [":framework_lite"], ) -- GitLab From a8efd478702bbd4503f8afa9559b86a54d8544eb Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Fri, 2 Feb 2018 15:00:17 -0800 Subject: [PATCH 0035/2939] Adds batch inference support on TPU with TPUEstimator.predict. PiperOrigin-RevId: 184339842 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 627 +++++++++++++++--- 1 file changed, 541 insertions(+), 86 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index c7008533f3..b5082fc823 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -47,6 +47,7 @@ from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator import util from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -170,10 +171,12 @@ class _TPUContext(object): ``` """ - def __init__(self, config, train_batch_size, eval_batch_size, use_tpu): + def __init__(self, config, train_batch_size, eval_batch_size, + predict_batch_size, use_tpu): self._config = config self._train_batch_size = train_batch_size self._eval_batch_size = eval_batch_size + self._predict_batch_size = predict_batch_size self._use_tpu = use_tpu self._num_shards_or_none = self._config.tpu_config.num_shards self._mode = None @@ -218,31 +221,66 @@ class _TPUContext(object): return (self._mode == model_fn_lib.ModeKeys.TRAIN and not self._config.tpu_config.per_host_input_for_training) - def is_running_on_cpu(self): - """Determines whether the input_fn and model_fn should be invoked on CPU.""" + def is_running_on_cpu(self, is_export_mode=False): + """Determines whether the input_fn and model_fn should be invoked on CPU. + + Args: + is_export_mode: Indicates whether the current mode is for exporting the + model, when mode == PREDICT. Only with this bool, we could + tell whether user is calling the Estimator.predict or + Estimator.export_savedmodel, which are running on TPU and CPU + respectively. Parent class Estimator does not distingush these two. + + Returns: + bool, whether current input_fn or model_fn should be running on CPU. + + Raises: + ValueError: any configuration is invalid. + """ mode = self._assert_mode() - return (not self._use_tpu) or mode == model_fn_lib.ModeKeys.PREDICT + + if not self._use_tpu: + return True + + if mode != model_fn_lib.ModeKeys.PREDICT: + return False + + # There are actually 2 use cases when running with mode.PREDICT: prediction + # and saving the model. We run actual predictions on the TPU, but + # model export is run on the CPU. + if is_export_mode: + return True + + if self._predict_batch_size is None: + raise ValueError( + 'predict_batch_size in TPUEstimator constructor should not be ' + '`None` if .predict is running on TPU.') + if self.num_hosts > 1: + raise ValueError( + 'TPUEstimator.predict should be running on single host.') + + return False @property def global_batch_size(self): mode = self._assert_mode() - return (self._train_batch_size - if mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size) + if mode == model_fn_lib.ModeKeys.TRAIN: + return self._train_batch_size + elif mode == model_fn_lib.ModeKeys.EVAL: + return self._eval_batch_size + elif mode == model_fn_lib.ModeKeys.PREDICT: + return self._predict_batch_size + else: + return None @property def batch_size_for_input_fn(self): """Returns the shard batch size for `input_fn`.""" - mode = self._assert_mode() + global_batch_size = self.global_batch_size + if self.is_running_on_cpu(): - if mode == model_fn_lib.ModeKeys.TRAIN: - return self._train_batch_size - if mode == model_fn_lib.ModeKeys.EVAL: - return self._eval_batch_size - return None + return global_batch_size - global_batch_size = ( - self._train_batch_size - if mode == model_fn_lib.ModeKeys.TRAIN else self._eval_batch_size) # On TPU if self.is_input_sharded_per_core(): return global_batch_size // self.num_cores @@ -252,19 +290,13 @@ class _TPUContext(object): @property def batch_size_for_model_fn(self): """Returns the shard batch size for `model_fn`.""" - mode = self._assert_mode() + global_batch_size = self.global_batch_size + if self.is_running_on_cpu(): - if mode == model_fn_lib.ModeKeys.TRAIN: - return self._train_batch_size - if mode == model_fn_lib.ModeKeys.EVAL: - return self._eval_batch_size - return None + return global_batch_size # On TPU. always sharded per core. - if mode == model_fn_lib.ModeKeys.TRAIN: - return self._train_batch_size // self.num_cores - else: - return self._eval_batch_size // self.num_cores + return global_batch_size // self.num_cores @property def master_job(self): @@ -506,6 +538,22 @@ class _OpQueueContext(object): self._thread.join() +class _OpSignalOnceQueueContext(_OpQueueContext): + """Manages work queue and thread for a infeed/outfeed thread. + + This subclass only signals once. + """ + + def __init__(self, name, target, args): + super(_OpSignalOnceQueueContext, self).__init__(name, target, args) + self._has_signaled = False + + def send_next_batch_signal(self, iterations): + if not self._has_signaled: + self._queue.put(iterations) + self._has_signaled = True + + class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): """A Session hook setting up the TPU initialization, infeed, and outfeed. @@ -633,13 +681,16 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): except Exception as e: # pylint: disable=broad-except self._log_error(session, e) + def _create_infeed_controller(self, name, target, args): + return _OpQueueContext(name=name, target=target, args=args) + def after_create_session(self, session, coord): logging.info('Init TPU system') session.run(self._init_ops, options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000)) logging.info('Start infeed thread controller') - self._infeed_controller = _OpQueueContext( + self._infeed_controller = self._create_infeed_controller( name='InfeedController', target=self._run_infeed, args=(session,)) logging.info('Start outfeed thread controller') @@ -677,6 +728,16 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): session.run(self._finalize_ops) +class TPUInfeedOutfeedSessionHookForPrediction(TPUInfeedOutfeedSessionHook): + + def __init__(self, ctx, enqueue_ops, dequeue_ops): + super(TPUInfeedOutfeedSessionHookForPrediction, self).__init__( + ctx, enqueue_ops, dequeue_ops, run_infeed_loop_on_coordinator=False) + + def _create_infeed_controller(self, name, target, args): + return _OpSignalOnceQueueContext(name=name, target=target, args=args) + + class _TPUStopAtStepHook(session_run_hook.SessionRunHook): """Hook that requests stop at a specified step. @@ -764,6 +825,47 @@ class _SetEvalIterationsHook(session_run_hook.SessionRunHook): self._iterations_per_loop_var.load(self._num_steps, session=session) +class _StoppingPredictHook(session_run_hook.SessionRunHook): + """Hook that requests stop according to the stopping signal in prediction.""" + + def __init__(self, scalar_stopping_signal): + self._scalar_stopping_signal = scalar_stopping_signal + + def begin(self): + self._iterations_per_loop_var = _create_or_get_iterations_per_loop() + + def after_create_session(self, session, coord): + # This is not necessary as we do not run infeed enqueue and outfeed dequeue + # in side threads for prediction model. But it makes the + # TPUInfeedOutfeedSessionHook prints nice message. + self._iterations_per_loop_var.load(1, session=session) + + def before_run(self, run_context): + return session_run_hook.SessionRunArgs(self._scalar_stopping_signal) + + def after_run(self, run_context, run_values): + _ = run_context + scalar_stopping_signal = run_values.results + if _StopSignals.should_stop(scalar_stopping_signal): + # NOTE(xiejw): In prediction, stopping signals are inserted for each + # batch. And we append one more batch to signal the system it should stop. + # The data flow might look like + # + # batch 0: images, labels, stop = 0 (user provideded) + # batch 1: images, labels, stop = 0 (user provideded) + # ... + # batch 99: images, labels, stop = 0 (user provideded) + # batch 100: images, labels, stop = 1 (TPUEstimator appended) + # + # where the final batch (id = 100) is appended by TPUEstimator, so we + # should drop it before returning the predictions to user. + # To achieve that, we throw the OutOfRangeError in after_run. Once + # Monitored Session sees this error in SessionRunHook.after_run, the + # "current" prediciton, i.e., batch with id=100, will be discarded + # immediately + raise errors.OutOfRangeError(None, None, 'Stopped by stopping signal.') + + def generate_per_core_enqueue_ops_fn_for_host(ctx, input_fn, inputs_structure_recorder): """Generates infeed enqueue ops for per-core input_fn on a single host.""" @@ -815,6 +917,14 @@ def generate_per_host_enqueue_ops_fn_for_host( inputs = _Inputs.from_input_fn(input_fn()) is_dataset = inputs.is_dataset + if ctx.mode == model_fn_lib.ModeKeys.PREDICT: + if not is_dataset: + raise TypeError( + 'For mode PREDICT, `input_fn` must return `Dataset` instead of ' + '`features` and `labels`.') + inputs = _InputsWithStoppingSignals( + dataset=inputs.dataset, batch_size=ctx.batch_size_for_input_fn) + if is_dataset: hooks.append(inputs.dataset_initializer_hook()) @@ -825,11 +935,13 @@ def generate_per_host_enqueue_ops_fn_for_host( # dataset, it is initialized and the features and labels extracted via # `dataset.iterator.get_next()` features, labels = inputs.features_and_labels() + signals = inputs.signals() - inputs_structure_recorder.validate_and_record_structure(features, labels) + inputs_structure_recorder.validate_and_record_structure( + features, labels, signals) unsharded_tensor_list = ( inputs_structure_recorder.flatten_features_and_labels( - features, labels)) + features, labels, signals)) infeed_queue = tpu_feed.InfeedQueue( tuple_types=[t.dtype for t in unsharded_tensor_list], @@ -841,7 +953,13 @@ def generate_per_host_enqueue_ops_fn_for_host( per_host_enqueue_ops = ( infeed_queue.split_inputs_and_generate_enqueue_ops( unsharded_tensor_list, placement_function=lambda x: device)) - return per_host_enqueue_ops + if signals is None: + return per_host_enqueue_ops + else: + return { + 'ops': per_host_enqueue_ops, + 'signals': signals, + } return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset @@ -883,6 +1001,7 @@ class _InputPipeline(object): self._feature_names = [] self._label_names = [] self._has_labels = False + self._signals_helper = None # Internal state. self._initialized = False @@ -890,7 +1009,7 @@ class _InputPipeline(object): def has_labels(self): return self._has_labels - def validate_and_record_structure(self, features, labels): + def validate_and_record_structure(self, features, labels, signals=None): """Validates and records the structure of features` and `labels`.""" def _extract_key_names(tensor_or_dict): @@ -903,6 +1022,10 @@ class _InputPipeline(object): feature_names = _extract_key_names(features) label_names = _extract_key_names(labels) + if signals is not None and self._signals_helper is None: + # Record signals helper. + self._signals_helper = _SignalsHelper(signals) + if self._initialized: # Verify the structure is same. The following should never happen. assert feature_names == self._feature_names, 'feature keys mismatched' @@ -915,7 +1038,7 @@ class _InputPipeline(object): self._label_names = label_names self._has_labels = has_labels - def flatten_features_and_labels(self, features, labels): + def flatten_features_and_labels(self, features, labels, signals=None): """Flattens the `features` and `labels` to a single tensor list.""" flattened_inputs = [] if self._feature_names: @@ -931,6 +1054,9 @@ class _InputPipeline(object): flattened_inputs.extend([labels[name] for name in self._label_names]) else: flattened_inputs.append(labels) + + if signals is not None: + flattened_inputs.extend(_SignalsHelper.as_tensor_list(signals)) return flattened_inputs def unflatten_features_and_labels(self, flattened_inputs): @@ -956,7 +1082,11 @@ class _InputPipeline(object): else: expected_num_labels = 0 - expected_num_tensors = expected_num_features + expected_num_labels + expected_num_signals = ( + self._signals_helper.num_signals if self._signals_helper else 0) + + expected_num_tensors = ( + expected_num_features + expected_num_labels + expected_num_signals) if expected_num_tensors != len(flattened_inputs): raise ValueError( @@ -973,13 +1103,20 @@ class _InputPipeline(object): if expected_num_labels == 0: unflattened_label = None elif self._label_names: - unflattened_label = dict( - zip(self._label_names, flattened_inputs[expected_num_features:])) + label_list = flattened_inputs[ + expected_num_features:expected_num_features + expected_num_labels] + unflattened_label = dict(zip(self._label_names, label_list)) else: # Single tensor case. unflattened_label = flattened_inputs[expected_num_features] - return _Inputs(unflattened_features, unflattened_label) + signals = None + if expected_num_signals != 0: + tensor_list_for_signals = flattened_inputs[ + expected_num_features + expected_num_labels:] + signals = self._signals_helper.unflatten(tensor_list_for_signals) + + return _Inputs(unflattened_features, unflattened_label, signals=signals) def __init__(self, input_fn, batch_axis, ctx): """Constructor. @@ -1078,9 +1215,12 @@ class _InputPipeline(object): # slow compared to the previous case. if is_dataset: run_infeed_loop_on_coordinator = False + wrap_fn = ( + _wrap_computation_in_while_loop + if self._ctx.mode != model_fn_lib.ModeKeys.PREDICT else + _wrap_computation_in_while_loop_with_stopping_signals) enqueue_ops.append( - _wrap_computation_in_while_loop( - device=host_device, op_fn=enqueue_ops_fn)) + wrap_fn(device=host_device, op_fn=enqueue_ops_fn)) else: enqueue_ops.append(enqueue_ops_fn()) infeed_queues.append(captured_infeed_queue.get()) @@ -1145,7 +1285,8 @@ class _ModelFnWrapper(object): infeed dequeue channel. Returns: - A Fn representing the train step for TPU. + A tuple of train_fn, host_calls, and captured scaffold_fn. The train_fn + representing the train step for TPU. """ host_call = _OutfeedHostCall(self._ctx) @@ -1198,8 +1339,8 @@ class _ModelFnWrapper(object): infeed dequeue channel. Returns: - A tuple of eval_fn and eval_metrics. The eval_fn representing the eval - step for TPU. and eval_metrics is an `_OutfeedHostCall` instance. + A tuple of eval_fn, host_calls, and captured scaffold_fn. The eval_fn + representing the eval step for TPU. """ host_calls = _OutfeedHostCall(self._ctx) captured_scaffold_fn = _CapturedObject() @@ -1228,7 +1369,55 @@ class _ModelFnWrapper(object): return eval_step, host_calls, captured_scaffold_fn - def _call_model_fn(self, features, labels): + def convert_to_single_tpu_predict_step(self, dequeue_fn): + """Converts user provided model_fn` as a single predict step on TPU. + + Args: + dequeue_fn: The function to retrieve inputs, features and labels, from TPU + infeed dequeue channel. + + Returns: + A tuple of predict_fn, host_calls, and captured scaffold_fn. The + predict_fn representing the predict step for TPU. + """ + host_calls = _OutfeedHostCall(self._ctx) + captured_scaffold_fn = _CapturedObject() + + def predict_step(unused_scalar_stopping_signal): + """Evaluation step function for use inside a while loop.""" + inputs = dequeue_fn() + features, labels = inputs.features_and_labels() + stopping_signals = inputs.signals() + + assert stopping_signals is not None, ( + 'Internal Error: `signals` is missing.') + + tpu_estimator_spec = self._call_model_fn( + features, labels, is_export_mode=False) + if not isinstance(tpu_estimator_spec, TPUEstimatorSpec): + raise RuntimeError( + 'estimator_spec used by TPU prediction must have type' + '`TPUEstimatorSpec`. Got {}'.format(type(tpu_estimator_spec))) + + captured_scaffold_fn.capture(tpu_estimator_spec.scaffold_fn) + to_record = {} + identity_fn = lambda **kwargs: kwargs + # TODO(xiejw): Adds validation for prediction dictionrary. + # TODO(xiejw): Adds support for single tensor as predictions. + if not isinstance(tpu_estimator_spec.predictions, dict): + raise TypeError('TPUEstimatorSpec.predictions must be dict of Tensors.') + to_record['predictions'] = [identity_fn, tpu_estimator_spec.predictions] + to_record['signals'] = [identity_fn, stopping_signals] + if tpu_estimator_spec.host_call is not None: + to_record['host_call'] = tpu_estimator_spec.host_call + host_calls.record(to_record) + + with ops.control_dependencies(host_calls.create_enqueue_op()): + return _StopSignals.as_scalar_stopping_signal(stopping_signals) + + return predict_step, host_calls, captured_scaffold_fn + + def _call_model_fn(self, features, labels, is_export_mode=True): """Calls the model_fn with required parameters.""" model_fn_args = util.fn_args(self._model_fn) kwargs = {} @@ -1259,7 +1448,7 @@ class _ModelFnWrapper(object): params[_BATCH_SIZE_KEY] = batch_size_for_model_fn estimator_spec = self._model_fn(features=features, **kwargs) - if (self._ctx.is_running_on_cpu() and + if (self._ctx.is_running_on_cpu(is_export_mode) and isinstance(estimator_spec, TPUEstimatorSpec)): # The estimator_spec will be passed to `Estimator` directly, which expects # type `EstimatorSpec`. @@ -1614,6 +1803,7 @@ class TPUEstimator(estimator_lib.Estimator): use_tpu=True, train_batch_size=None, eval_batch_size=None, + predict_batch_size=None, batch_axis=None): """Constructs an `TPUEstimator` instance. @@ -1639,7 +1829,9 @@ class TPUEstimator(estimator_lib.Estimator): size, as params['batch_size'], when calling `input_fn` and `model_fn`. Cannot be `None` if `use_tpu` is `True`. Must be divisible by `config.tpu_config.num_shards`. - eval_batch_size: An int representing the global training batch size. + eval_batch_size: An int representing evaluation batch size. + Must be divisible by `config.tpu_config.num_shards`. + predict_batch_size: An int representing the prediction batch size. Must be divisible by `config.tpu_config.num_shards`. batch_axis: A python tuple of int values describing how each tensor produced by the Estimator `input_fn` should be split across the TPU @@ -1689,6 +1881,16 @@ class TPUEstimator(estimator_lib.Estimator): 'eval batch size {} must be divisible by number of shards {}' .format(eval_batch_size, config.tpu_config.num_shards)) + if predict_batch_size is not None: + if not isinstance(predict_batch_size, int): + raise ValueError('`predict_batch_size` must be an int') + if predict_batch_size < 1: + raise ValueError('`predict_batch_size` must be positive') + if predict_batch_size % config.tpu_config.num_shards != 0: + raise ValueError( + 'predict batch size {} must be divisible by number of shards {}' + .format(predict_batch_size, config.tpu_config.num_shards)) + # Verifies the model_fn signature according to Estimator framework. estimator_lib._verify_model_fn_args(model_fn, params) # pylint: disable=protected-access # We cannot store config and params in this constructor as parent @@ -1708,7 +1910,7 @@ class TPUEstimator(estimator_lib.Estimator): # All properties passed to _TPUContext are immutable. self._ctx = _TPUContext(self._config, train_batch_size, eval_batch_size, - use_tpu) + predict_batch_size, use_tpu) def _create_global_step(self, graph): """Creates a global step suitable for TPUs. @@ -1804,7 +2006,9 @@ class TPUEstimator(estimator_lib.Estimator): if batch_size_for_input_fn is not None: kwargs['params'][_BATCH_SIZE_KEY] = batch_size_for_input_fn - if ctx.is_running_on_cpu(): + # For export_savedmodel, input_fn is never passed to Estimator. So, + # `is_export_mode` must be False. + if ctx.is_running_on_cpu(is_export_mode=False): with ops.device('/device:CPU:0'): return input_fn(**kwargs) @@ -1831,8 +2035,13 @@ class TPUEstimator(estimator_lib.Estimator): with self._ctx.with_mode(mode) as ctx: model_fn_wrapper = _ModelFnWrapper(model_fn, config, params, ctx) - # TODO(jhseu): Move to PREDICT to TPU. - if ctx.is_running_on_cpu(): + # For export_savedmodel, input_fn is never passed to Estimator. So, + # if features is callable, it means it is the input_fn passed by + # TPUEstimator._call_input_fn. Then we can know if the mode == PREDICT, + # it implies, it is the .predict API, not export_savedmodel API. + is_export_mode = not callable(features) + + if ctx.is_running_on_cpu(is_export_mode=is_export_mode): logging.info('Running %s on CPU', mode) return model_fn_wrapper.call_without_tpu(features, labels) @@ -1881,53 +2090,114 @@ class TPUEstimator(estimator_lib.Estimator): train_op=control_flow_ops.group(*update_ops), scaffold=scaffold) - # Now eval. - total_loss, host_calls, scaffold = _eval_on_tpu_system( + if mode == model_fn_lib.ModeKeys.EVAL: + total_loss, host_calls, scaffold = _eval_on_tpu_system( + ctx, model_fn_wrapper, dequeue_fn) + iterations_per_loop_var = _create_or_get_iterations_per_loop() + mean_loss = math_ops.div(total_loss, + math_ops.cast( + iterations_per_loop_var, + dtype=total_loss.dtype)) + + # Creates a dummy metric update_op for all metrics. Estimator expects + # all metrics in eval_metric_ops have update_op and calls them one by + # one. The real metric update_ops are invoked in a separated thread. + # So, here give Estimator the dummy op for all metrics. + with ops.control_dependencies([mean_loss]): + # After TPU evaluation computation is done (the mean_loss tensor), + # reads all variables back from TPU and updates the eval step + # counter properly + internal_ops_to_run = _sync_variables_ops() + internal_ops_to_run.append( + _increase_eval_step_op(iterations_per_loop_var)) + with ops.control_dependencies(internal_ops_to_run): + dummy_update_op = control_flow_ops.no_op() + + host_call_ret = host_calls.create_tpu_hostcall() + eval_metric_ops = {} + eval_update_ops = [] + for k, v in host_call_ret['eval_metrics'].items(): + eval_metric_ops[k] = (v[0], dummy_update_op) + eval_update_ops.append(v[1]) + + if 'host_call' not in host_call_ret: + host_ops = [] + else: + host_ops = host_call_ret['host_call'] + hooks = [ + TPUInfeedOutfeedSessionHook( + ctx, + enqueue_ops, + eval_update_ops + host_ops, + run_infeed_loop_on_coordinator=( + run_infeed_loop_on_coordinator)), + ] + input_hooks + + return model_fn_lib.EstimatorSpec( + mode, + loss=mean_loss, + evaluation_hooks=hooks, + eval_metric_ops=eval_metric_ops, + scaffold=scaffold) + + # Predict + assert mode == model_fn_lib.ModeKeys.PREDICT + + dummy_predict_op, host_calls, scaffold = _predict_on_tpu_system( ctx, model_fn_wrapper, dequeue_fn) - iterations_per_loop_var = _create_or_get_iterations_per_loop() - mean_loss = math_ops.div(total_loss, - math_ops.cast( - iterations_per_loop_var, - dtype=total_loss.dtype)) - - # Creates a dummy metric update_op for all metrics. Estimator expects - # all metrics in eval_metric_ops have update_op and calls them one by - # one. The real metric update_ops are invoked in a separated thread. So, - # here give Estimator the dummy op for all metrics. - with ops.control_dependencies([mean_loss]): - # After TPU evaluation computation is done (the mean_loss tensor), - # reads all variables back from TPU and updates the eval step counter - # properly + with ops.control_dependencies([dummy_predict_op]): internal_ops_to_run = _sync_variables_ops() - internal_ops_to_run.append( - _increase_eval_step_op(iterations_per_loop_var)) with ops.control_dependencies(internal_ops_to_run): - dummy_update_op = control_flow_ops.no_op() + dummy_predict_op = control_flow_ops.no_op() + + # In train and evaluation, the main TPU program is passed to monitored + # training session to run. Infeed enqueue and outfeed dequeue are + # executed in side threads. This is not the configuration for + # prediction mode. + # + # For prediction, the Estimator executes the EstimatorSpec.predictions + # directly and yield the element (via generator) to call site. So, the + # outfeed based prediction must be passed to MonitoredSession directly. + # Other parts of the TPU execution are organized as follows. + # + # 1. All outfeed based Tensors must be grouped with predictions Tensors + # to form a single invocation. This avoid the issue we might trigger + # multiple outfeeds incorrectly. To achieve this, `host_call` is + # placed in control_dependencies of `stopping_signals`, and + # `stopping_signals` is passed into _StoppingPredictHook, which sets + # the `stopping_signals` as SessionRunArgs. MonitoredSession merges + # all SessionRunArgs with the fetch in session.run together. + # + # 2. The TPU program (dummy_predict_op) and enqueue_ops (infeed Enqueue) + # are grouped together. They will be launched once and only once in + # side threads and they quit naturally according to the SAME stopping + # condition. + enqueue_ops.append(dummy_predict_op) host_call_ret = host_calls.create_tpu_hostcall() - eval_metric_ops = {} - eval_update_ops = [] - for k, v in host_call_ret['eval_metrics'].items(): - eval_metric_ops[k] = (v[0], dummy_update_op) - eval_update_ops.append(v[1]) - if 'host_call' not in host_call_ret: host_ops = [] else: host_ops = host_call_ret['host_call'] + + predictions = host_call_ret['predictions'] + stopping_signals = host_call_ret['signals'] + + with ops.control_dependencies(host_ops): + host_ops = [] # Empty, we do do not need it anymore. + scalar_stopping_signal = _StopSignals.as_scalar_stopping_signal( + stopping_signals) + hooks = [ - TPUInfeedOutfeedSessionHook( - ctx, - enqueue_ops, - eval_update_ops + host_ops, - run_infeed_loop_on_coordinator=run_infeed_loop_on_coordinator), + _StoppingPredictHook(scalar_stopping_signal), + TPUInfeedOutfeedSessionHookForPrediction(ctx, enqueue_ops, + host_ops), ] + input_hooks return model_fn_lib.EstimatorSpec( mode, - loss=mean_loss, - evaluation_hooks=hooks, - eval_metric_ops=eval_metric_ops, + prediction_hooks=hooks, + predictions=predictions, scaffold=scaffold) return _model_fn @@ -1981,6 +2251,34 @@ def _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): return loss, host_call, scaffold +def _predict_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn): + """Executes `model_fn_wrapper` multiple times on all TPU shards.""" + num_cores = ctx.num_cores + + single_tpu_predict_step, host_calls, captured_scaffold_fn = ( + model_fn_wrapper.convert_to_single_tpu_predict_step(dequeue_fn)) + + def multi_tpu_predict_steps_on_single_shard(): + + def cond(scalar_stopping_signal): + return math_ops.logical_not( + _StopSignals.should_stop(scalar_stopping_signal)) + + inputs = [_StopSignals.NON_STOPPING_SIGNAL] + outputs = training_loop.while_loop( + cond, single_tpu_predict_step, inputs=inputs, name=b'loop') + return outputs + + (dummy_predict_op,) = tpu.shard( + multi_tpu_predict_steps_on_single_shard, + inputs=[], + num_shards=num_cores, + outputs_from_all_shards=False) + + scaffold = _get_scaffold(captured_scaffold_fn) + return dummy_predict_op, host_calls, scaffold + + def _wrap_computation_in_while_loop(device, op_fn): """Wraps the ops generated by `op_fn` in tf.while_loop.""" @@ -1999,6 +2297,29 @@ def _wrap_computation_in_while_loop(device, op_fn): parallel_iterations=1) +def _wrap_computation_in_while_loop_with_stopping_signals(device, op_fn): + """Wraps the ops generated by `op_fn` in tf.while_loop.""" + + def cond(scalar_stopping_signal): + return math_ops.logical_not( + _StopSignals.should_stop(scalar_stopping_signal)) + + def computation(unused_scalar_stopping_signal): + return_value = op_fn() + execute_ops = return_value['ops'] + signals = return_value['signals'] + with ops.control_dependencies(execute_ops): + return _StopSignals.as_scalar_stopping_signal(signals) + + # By setting parallel_iterations=1, the parallel execution in while_loop is + # basically turned off. + with ops.device(device): + return control_flow_ops.while_loop( + cond, + computation, [_StopSignals.NON_STOPPING_SIGNAL], + parallel_iterations=1) + + def _validate_tpu_training_graph(): """Validate graph before running distributed training. @@ -2091,21 +2412,22 @@ class _CapturingContext(control_flow_ops.ControlFlowContext): self._g._set_control_flow_context(self._old) # pylint: disable=protected-access -# TODO(xiejw): Extend this to support internal signal. class _Inputs(object): """A data structure representing the input_fn returned values. This also supports the returned value from input_fn as `Dataset`. """ - def __init__(self, features=None, labels=None, dataset=None): - if dataset is not None and (features is not None or labels is not None): + def __init__(self, features=None, labels=None, dataset=None, signals=None): + if dataset is not None and (features is not None or labels is not None or + signals is not None): raise RuntimeError('Internal Error: Either (features and labels) or ' 'dataset should be provided, not both. Please file ' 'bug') self._features = features self._labels = labels + self._signals = signals self._dataset = dataset self._iterator = None @@ -2117,11 +2439,16 @@ class _Inputs(object): dataset = return_values return _Inputs(dataset=dataset) + features, labels = _Inputs._parse_inputs(return_values) + return _Inputs(features, labels) + + @staticmethod + def _parse_inputs(return_values): if isinstance(return_values, tuple): features, labels = return_values else: features, labels = return_values, None - return _Inputs(features, labels) + return features, labels @property def is_dataset(self): @@ -2142,7 +2469,135 @@ class _Inputs(object): def features_and_labels(self): """Gets `features` and `labels`.""" if self.is_dataset: - return (_Inputs.from_input_fn( - self._iterator.get_next()).features_and_labels()) + return _Inputs._parse_inputs(self._iterator.get_next()) return (self._features, self._labels) + + def signals(self): + return self._signals + + @property + def dataset(self): + return self._dataset + + +# TODO(xiejw): Extend this to support final partial batch. +class _InputsWithStoppingSignals(_Inputs): + """Inputs with `_StopSignals` inserted into the dataset.""" + + def __init__(self, dataset, batch_size): + + assert dataset is not None + + user_provided_dataset = dataset.map( + _InputsWithStoppingSignals.insert_stopping_signal( + stop=False, batch_size=batch_size)) + final_batch_dataset = dataset.take(1).map( + _InputsWithStoppingSignals.insert_stopping_signal( + stop=True, batch_size=batch_size)) + dataset = user_provided_dataset.concatenate(final_batch_dataset).prefetch(2) + + super(_InputsWithStoppingSignals, self).__init__(dataset=dataset) + self._current_inputs = None + + def features_and_labels(self): + if self._current_inputs is not None: + raise RuntimeError( + 'Internal Error: The previous inputs have not been properly ' + 'consumed. First call features_and_labels, then call signals.') + + inputs_with_signals = self._iterator.get_next() + features = inputs_with_signals['features'] + labels = inputs_with_signals.get('labels') + + self._current_inputs = inputs_with_signals + return features, labels + + def signals(self): + """Returns the `Signals` from `_Inputs`.""" + if self._current_inputs is None: + raise RuntimeError( + 'Internal Error: The current inputs have not been properly ' + 'generated. First call features_and_labels, then call signals.') + signals = self._current_inputs['signals'] + self._current_inputs = None + return signals + + @staticmethod + def insert_stopping_signal(stop, batch_size): + """Inserts stopping_signal into dataset via _map_fn. + + Here we change the data structure in the dataset, such that the return value + is a dictionary now and `features`, `labels`, and `signals` are three + distinguished keys in that dict. This provides a better structure, which + eases the process to decompose the inputs (see `features_and_labels`). + + Args: + stop: bool, state of current stopping signals. + batch_size: int, batch size. + + Returns: + A map_fn passed to dataset.map API. + """ + + def _map_fn(*args): + features, labels = _Inputs._parse_inputs(args) + new_input_dict = {} + new_input_dict['features'] = features + if labels is not None: + new_input_dict['labels'] = labels + new_input_dict['signals'] = _StopSignals( + stop=stop, batch_size=batch_size).as_dict() + return new_input_dict + + return _map_fn + + +class _StopSignals(object): + """Signals class holding all logic to handle TPU stopping condition.""" + + NON_STOPPING_SIGNAL = 0.0 + STOPPING_SIGNAL = 1.0 + + def __init__(self, stop, batch_size): + self._stop = stop + self._batch_size = batch_size + + def as_dict(self): + shape = [self._batch_size, 1] + dtype = dtypes.float32 + + if self._stop: + stopping = array_ops.ones(shape=shape, dtype=dtype) + else: + stopping = array_ops.zeros(shape=shape, dtype=dtype) + + return {'stopping': stopping} + + @staticmethod + def as_scalar_stopping_signal(signals): + return array_ops.identity(signals['stopping'][0][0]) + + @staticmethod + def should_stop(scalar_stopping_signal): + return scalar_stopping_signal >= _StopSignals.STOPPING_SIGNAL + + +class _SignalsHelper(object): + """A general helper class to handle common signals manipulation.""" + + def __init__(self, signals): + self._signal_keys = [] + for key in sorted(signals.iterkeys()): + self._signal_keys.append(key) + + @property + def num_signals(self): + return len(self._signal_keys) + + def unflatten(self, tensor_list): + return dict(zip(self._signal_keys, tensor_list)) + + @staticmethod + def as_tensor_list(signals): + return [signals[key] for key in sorted(signals.iterkeys())] -- GitLab From 7785a8ebf417d0e867a08cabf2d42bb9b29dcb98 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Fri, 2 Feb 2018 16:35:10 -0800 Subject: [PATCH 0036/2939] Update global_step by default if the user specifies a host_call. PiperOrigin-RevId: 184352399 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index b5082fc823..56793f11d9 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -63,6 +63,7 @@ from tensorflow.python.training import evaluation from tensorflow.python.training import session_run_hook from tensorflow.python.training import training from tensorflow.python.training import training_util +from tensorflow.python.util import tf_inspect _INITIAL_LOSS = 1e7 _ZERO_LOSS = 0. @@ -484,7 +485,7 @@ class TPUEstimatorSpec( if self.eval_metrics is not None: host_calls['eval_metrics'] = self.eval_metrics if self.host_call is not None: - host_calls['host_call'] = self.host_call + host_calls['host_call'] = wrap_hostcall_with_global_step(self.host_call) host_call_ret = _OutfeedHostCall.create_cpu_hostcall(host_calls) eval_metric_ops = None if self.eval_metrics is not None: @@ -1306,7 +1307,9 @@ class _ModelFnWrapper(object): if isinstance(estimator_spec, TPUEstimatorSpec): captured_scaffold_fn.capture(estimator_spec.scaffold_fn) if estimator_spec.host_call is not None: - host_call.record({'host_call': estimator_spec.host_call}) + host_call.record({ + 'host_call': wrap_hostcall_with_global_step( + estimator_spec.host_call)}) host_call_outfeed_ops = host_call.create_enqueue_op() else: captured_scaffold_fn.capture(None) @@ -1361,6 +1364,8 @@ class _ModelFnWrapper(object): to_record = {} to_record['eval_metrics'] = tpu_estimator_spec.eval_metrics if tpu_estimator_spec.host_call is not None: + # We assume that evaluate won't update global step, so we don't wrap + # this host_call. to_record['host_call'] = tpu_estimator_spec.host_call host_calls.record(to_record) @@ -1503,11 +1508,14 @@ class _OutfeedHostCall(object): raise ValueError('{}[1] should be tuple or list, or dict.'.format(name)) if isinstance(host_call[1], (tuple, list)): + fullargspec = tf_inspect.getfullargspec(host_call[0]) fn_args = util.fn_args(host_call[0]) - if len(host_call[1]) != len(fn_args): + # wrapped_hostcall_with_global_step uses varargs, so we allow that. + if fullargspec.varargs is None and len(host_call[1]) != len(fn_args): raise RuntimeError( - 'In TPUEstimatorSpec.{}, length of tensors does not ' - 'match method args of metric_fn.'.format(name)) + 'In TPUEstimatorSpec.{}, length of tensors {} does not match ' + 'method args of the function, which takes {}.'.format( + name, len(host_call[1]), len(fn_args))) @staticmethod def create_cpu_hostcall(host_calls): @@ -1649,6 +1657,38 @@ class _OutfeedHostCall(object): return ret +def wrap_hostcall_with_global_step(hostcall): + """Wrap the hostcall so that we update the global step upon every call.""" + if hostcall is None: + return None + host_fn, tensors = hostcall + + def global_step_host_fn(_global_step, *args, **kwargs): # pylint: disable=invalid-name + # Note that we don't have any ordering here, so the graph may see a + # global_step that's off by 1. + state_ops.assign( + training.get_global_step(), + math_ops.cast(_global_step[0], dtypes.int64)) + return host_fn(*args, **kwargs) + # Give the global step tensor a batch dimension. Reshape is not supported for + # int64, so we cast it to int32. + # TODO(jhseu): Remove the cast once int64 is supported. + global_step_tensor = array_ops.reshape( + math_ops.cast(training.get_global_step(), dtypes.int32), [1]) + if isinstance(tensors, dict): + outfeed_tensors = {'_global_step': global_step_tensor} + outfeed_tensors.update(tensors) + return global_step_host_fn, outfeed_tensors + else: + fn_args = util.fn_args(host_fn) + if len(tensors) != len(fn_args): + raise RuntimeError( + 'In TPUEstimatorSpec.host_call, length of tensors {} does not match ' + 'method args of the function, which takes {}.'.format( + len(tensors), len(fn_args))) + return global_step_host_fn, [global_step_tensor] + list(tensors) + + class _OutfeedHostCallHook(session_run_hook.SessionRunHook): """Hook to run host calls when use_tpu=False.""" -- GitLab From 359ab22b7972802e19fe7949ebd945a59998f549 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Sat, 3 Feb 2018 09:05:25 -0800 Subject: [PATCH 0037/2939] skyewm: Fix Python3 crazy SessionTest.testReentryWithCApi failure. --- tensorflow/python/client/session.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 6befeb846d..f3c4fecdc0 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -1539,8 +1539,22 @@ class Session(BaseSession): def __exit__(self, exec_type, exec_value, exec_tb): if exec_type is errors.OpError: logging.error('Session closing due to OpError: %s', (exec_value,)) - self._default_session_context_manager.__exit__(exec_type, exec_value, - exec_tb) + try: + self._default_session_context_manager.__exit__(exec_type, exec_value, + exec_tb) + except RuntimeError as error: + if error == exec_value: + # NOTE(skyewm): for some reason, in Python3, + # _default_session_context_manager.__exit__ will re-raise the "not + # re-entrant" exception raised in __enter__ above (note that if we're + # here, we're in the outer session context manager, since __exit__ is + # not called when __enter__ raises an exception). We still want to + # continue cleaning up this context manager before the exception is + # further propagated, so we ignore it here (note that it'll continue + # being propagated after this method completes). + pass + else: + raise self._default_graph_context_manager.__exit__(exec_type, exec_value, exec_tb) self._default_session_context_manager = None -- GitLab From b23908b41ebad5dbe86255c2f196641e42490b2f Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Sat, 3 Feb 2018 09:17:55 -0800 Subject: [PATCH 0038/2939] Disabling keras io_utils_test on Windows. --- tensorflow/contrib/cmake/tf_tests.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 2e79eadf7f..73edd616ea 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -310,6 +310,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) "${tensorflow_source_dir}/tensorflow/python/kernel_tests/control_flow_util_test.py" # Flaky replicate_model_fn_test "${tensorflow_source_dir}/tensorflow/contrib/estimator/python/estimator/replicate_model_fn_test.py" # b/71901810 + # Broken io_utils_test + "${tensorflow_source_dir}/tensorflow/python/keras/_impl/keras/utils/io_utils_test.py" # b/72894325 ) endif() list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude}) -- GitLab From 6842913e9b56baa88c11f188e29e13466be9ed86 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Sat, 3 Feb 2018 09:46:13 -0800 Subject: [PATCH 0039/2939] Delete device_functions.h include. --- tensorflow/core/util/cuda_device_functions.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index f787687f66..d67663c669 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -29,7 +29,6 @@ limitations under the License. #include #include #include "cuda/include/cuda.h" -#include "cuda/include/device_functions.h" #include "tensorflow/core/platform/types.h" #if CUDA_VERSION >= 7050 -- GitLab From 7946a0b1d9998cc54cb952d538668883d3fd8181 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Sat, 3 Feb 2018 22:02:53 -0800 Subject: [PATCH 0040/2939] Fix the Windows GPU build #2 --- tensorflow/core/util/cuda_device_functions.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index d67663c669..525bef16a0 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -31,10 +31,6 @@ limitations under the License. #include "cuda/include/cuda.h" #include "tensorflow/core/platform/types.h" -#if CUDA_VERSION >= 7050 -#include "cuda/include/cuda_fp16.h" -#endif // CUDA_VERSION >= 7050 - namespace tensorflow { namespace detail { -- GitLab From 232fec7e52e52053a0e04c8919e7ece5c654d2de Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Sun, 4 Feb 2018 09:28:59 -0800 Subject: [PATCH 0041/2939] Fix for_canonicalization_test --- tensorflow/contrib/py2tf/converters/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 3853c60f99..fc6781d50e 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -131,6 +131,7 @@ py_test( py_test( name = "for_canonicalization_test", srcs = ["for_canonicalization_test.py"], + srcs_version = "PY2AND3", deps = [ ":test_lib", "//tensorflow/contrib/py2tf/pyct", -- GitLab From 28c52d14afb5a54930bcca0db60c9d5068a2c63e Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 5 Feb 2018 09:55:39 -0800 Subject: [PATCH 0042/2939] WIP: adding int8 calibration --- tensorflow/contrib/tensorrt/BUILD | 4 + .../contrib/tensorrt/convert/convert_graph.cc | 167 +++++++++---- .../contrib/tensorrt/convert/convert_graph.h | 3 +- .../contrib/tensorrt/convert/convert_nodes.cc | 230 ++++++++++++++++-- .../contrib/tensorrt/convert/convert_nodes.h | 38 ++- .../contrib/tensorrt/kernels/trt_calib_op.cc | 68 ++++++ .../contrib/tensorrt/kernels/trt_calib_op.h | 35 +++ .../contrib/tensorrt/kernels/trt_engine_op.cc | 6 +- .../contrib/tensorrt/ops/trt_calib_op.cc | 34 +++ .../contrib/tensorrt/python/trt_convert.py | 4 +- .../tensorrt/resources/TRTInt8Calibrator.cc | 2 +- .../contrib/tensorrt/resources/TRTResources.h | 35 ++- tensorflow/contrib/tensorrt/trt_conversion.i | 9 +- 13 files changed, 543 insertions(+), 92 deletions(-) create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.h create mode 100644 tensorflow/contrib/tensorrt/ops/trt_calib_op.cc diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index c10e85cffa..bcb8573045 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -60,9 +60,11 @@ tf_kernel_library( name = "trt_engine_op_kernel", srcs = [ "kernels/trt_engine_op.cc", + "kernels/trt_calib_op.cc", ], hdrs=[ "kernels/trt_engine_op.h", + "kernels/trt_calib_op.h", ], gpu_srcs = [ ], @@ -82,6 +84,7 @@ tf_kernel_library( tf_gen_op_libs( op_lib_names = [ "trt_engine_op", + "trt_calib_op", ], deps=[ "@local_config_tensorrt//:tensorrt", @@ -108,6 +111,7 @@ tf_gen_op_wrapper_py( name = "trt_engine_op", deps = [ ":trt_engine_op_op_lib", + ":trt_calib_op_op_lib", ":trt_shape_function", ], ) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 16d6e6ec7d..d14abf14dd 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -40,9 +40,8 @@ limitations under the License. #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) #include "tensorflow/core/grappler/optimizers/constant_folding.h" -#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" +#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" #include "tensorflow/core/grappler/devices.h" -//#include "tensorflow/core/grappler/clusters/single_machine.h" #include "tensorflow/core/grappler/clusters/virtual_cluster.h" #include "tensorflow/core/protobuf/device_properties.pb.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -121,73 +120,146 @@ std::unordered_map> BuildTensorNameMap( return result; } -tensorflow::Status ConvertSubGraphToTensorRT( - tensorflow::Graph& graph, const std::vector& output_names, - const std::set& subgraph_node_ids, size_t max_batch_size, - size_t max_workspace_size, - const tensorflow::grappler::GraphProperties& graph_properties) { - tensorflow::EdgeSet subgraph_incoming_edges; - GetSubGraphIncomingEdges(graph, subgraph_node_ids, &subgraph_incoming_edges); +struct ConvertGraphParams{ + ConvertGraphParams(tensorflow::Graph &graph_, + const std::vector &output_names_, + const std::set& subgraph_node_ids_, + size_t max_batch_size_, + size_t max_workspace_size_, + const tensorflow::grappler::GraphProperties &graph_properties_, + bool int8_ + ):graph(graph_),output_names(output_names_),subgraph_node_ids(subgraph_node_ids_), + max_batch_size(max_batch_size_),max_workspace_size(max_workspace_size_), + graph_properties(graph_properties_),int8(int8_){ + + } - std::vector> subgraph_inputs; + tensorflow::Graph& graph; + const std::vector& output_names; + const std::set& subgraph_node_ids; + size_t max_batch_size; + size_t max_workspace_size; + const tensorflow::grappler::GraphProperties& graph_properties; + bool int8; + std::vector> subgraph_inputs; + std::vector> subgraph_outputs; + tensorflow::EdgeSet subgraph_incoming_edges; + tensorflow::EdgeSet subgraph_outgoing_edges; +}; +tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams &p){ - // Collect inputs by looking for incoming edges - for (tensorflow::Edge const* edge : subgraph_incoming_edges) { - subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); + GetSubGraphIncomingEdges(p.graph, p.subgraph_node_ids, &p.subgraph_incoming_edges); + for (tensorflow::Edge const* edge : p.subgraph_incoming_edges) { + p.subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } + auto output_name_to_index_map = BuildTensorNameMap(p.output_names); std::set> subgraph_outputs_set; - // Collect outputs referenced from output_names - auto output_name_to_index_map = BuildTensorNameMap(output_names); - // for (int node_id : subgraph_node_ids_no_placeholder) { - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph.FindNodeId(node_id); + + for (int node_id : p.subgraph_node_ids) { + tensorflow::Node* node = p.graph.FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - // Collect outputs referenced from outgoing edges - tensorflow::EdgeSet subgraph_outgoing_edges; - // GetSubGraphOutgoingEdges(graph, subgraph_node_ids_no_placeholder, - // &subgraph_outgoing_edges); - GetSubGraphOutgoingEdges(graph, subgraph_node_ids, &subgraph_outgoing_edges); - for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { + + GetSubGraphOutgoingEdges(p.graph, p.subgraph_node_ids, &p.subgraph_outgoing_edges); + for (tensorflow::Edge const* edge : p.subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - // Impose an ordering on the outputs - std::vector> subgraph_outputs( + p.subgraph_outputs.reserve(subgraph_outputs_set.size()); + p.subgraph_outputs.insert(p.subgraph_outputs.begin(), subgraph_outputs_set.begin(), subgraph_outputs_set.end()); - // Build TensorRT node and add it to the graph + return tensorflow::Status::OK(); + +}; + +tensorflow::Status GetCalibNode(ConvertGraphParams *params){ + + FillSubGraphEdgeSets(*params); tensorflow::NodeDef trt_node_def; - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( - graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size, graph_properties, &trt_node_def)); + + SubGraphParams s(params->graph, params->subgraph_node_ids, params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size, params->graph_properties, &trt_node_def); + TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); tensorflow::Status status; - tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + TF_RETURN_IF_ERROR(status); + + for (auto inp_port: params->subgraph_inputs) { // loop over incoming edges and attach them to calib node + tensorflow::Node * in_node =params->graph.FindNodeId(inp_port.first); + params->graph.UpdateEdge(trt_node, inp_port.second, in_node, inp_port.second); + } + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params ) { + +// tensorflow::EdgeSet subgraph_incoming_edges; +// +// std::vector> subgraph_inputs; +// +// +// // Collect inputs by looking for incoming edges +// for (tensorflow::Edge const* edge : subgraph_incoming_edges) { +// subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); +// } +// std::set> subgraph_outputs_set; +// // Collect outputs referenced from output_names +// auto output_name_to_index_map = BuildTensorNameMap(output_names); +// for (int node_id : subgraph_node_ids) { +// tensorflow::Node* node = graph.FindNodeId(node_id); +// if (output_name_to_index_map.count(node->name())) { +// for (int index : output_name_to_index_map.at(node->name())) { +// subgraph_outputs_set.insert({node_id, index}); +// } +// } +// } +// // Collect outputs referenced from outgoing edges +// tensorflow::EdgeSet subgraph_outgoing_edges; +// // GetSubGraphOutgoingEdges(graph, subgraph_node_ids_no_placeholder, +// // &subgraph_outgoing_edges); +// GetSubGraphOutgoingEdges(graph, subgraph_node_ids, &subgraph_outgoing_edges); +// for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { +// subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); +// } +// // Impose an ordering on the outputs +// std::vector> subgraph_outputs( +// subgraph_outputs_set.begin(), subgraph_outputs_set.end()); +// // Build TensorRT node and add it to the graph + FillSubGraphEdgeSets(*params); + tensorflow::NodeDef trt_node_def; + + SubGraphParams s(params->graph, params->subgraph_node_ids, params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size, params->graph_properties, &trt_node_def); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); + tensorflow::Status status; + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); + for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { + for (tensorflow::Edge const* edge : params->subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - graph.UpdateEdge(trt_node, new_src_output, edge->dst(), edge->dst_input()); + params->graph.UpdateEdge(trt_node, new_src_output, edge->dst(), edge->dst_input()); } // Remove the original subgraph - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph.FindNodeId(node_id); + for (int node_id : params->subgraph_node_ids) { + tensorflow::Node* node = params->graph.FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - graph.RemoveNode(node); + params->graph.RemoveNode(node); } return tensorflow::Status::OK(); } @@ -209,7 +281,9 @@ tensorflow::Status BuildNodeMap( tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size, tensorflow::GraphDef* new_graph_def) { + size_t max_workspace_size, + tensorflow::GraphDef* new_graph_def, + bool int8=false) { // optimization pass tensorflow::grappler::GrapplerItem item; @@ -246,9 +320,9 @@ tensorflow::Status ConvertGraphDefToTensorRT( item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); status = fold.Optimize(nullptr, item, &gdef); - if (status !=tensorflow::Status::OK()) + if (status !=tensorflow::Status::OK()) { return status; - + } // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); static_graph_properties.InferStatically(false); @@ -296,9 +370,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( for (std::string const& node_name : subgraph_node_names) { subgraph_node_ids.insert(node_map.at(node_name)->id()); } - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, static_graph_properties)); + + ConvertGraphParams p(graph,output_names,subgraph_node_ids,max_batch_size,max_workspace_size, + static_graph_properties,int8); + if(int8) { + TF_RETURN_IF_ERROR(GetCalibNode(&p)); + } else{ + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT(&p)); + } } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index cd713de888..4ac33cf128 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -27,7 +27,8 @@ namespace convert { tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size, tensorflow::GraphDef* new_graph_def); + size_t max_workspace_size, + tensorflow::GraphDef* new_graph_def,bool int8); } } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 86c43d960a..d54c88d9f3 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResources.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) // Check if the types are equal. Cast to int first so that failure log message @@ -1547,23 +1548,216 @@ void Converter::register_op_converters() { } } // namespace +tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams &s ){ + return tensorflow::errors::Unimplemented("Not implemented yet"); +} + +tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams &s){ + // Visit nodes in reverse topological order and construct the TRT network. + + // Toposort + std::vector order_vec; + tensorflow::GetPostOrder(s.graph, &order_vec); + // Select just the subgraph + std::list order; + for (tensorflow::Node* node : order_vec) { + if (s.subgraph_node_ids.count(node->id())) { + // order.push_back(node); + order.push_front(node); // we want topological order to contstruct the + // network layer by layer + } + } + // topological order is needed to build TRT network + LOG(DEBUG) << "BUILDING 1"; + static int static_id = 0; + std::string calib_op_name=std::string("my_trt_calib_op_") + std::to_string(static_id++); + + + LOG(DEBUG) << "BUILDING 2"; + auto trt_rmgr=tensorflow::trt::TRTResourceManager::instance(); + auto op_rmgr=trt_rmgr->getManager("TRTCalibOps"); + auto op_res=new tensorflow::trt::TRTCalibrationResource(); + TF_CHECK_OK(op_rmgr->Create(calib_op_name,calib_op_name,op_res)); + op_res->logger=new tensorflow::tensorrt::Logger(); + op_res->builder = nvinfer1::createInferBuilder(*(op_res->logger)); + + if (!op_res->builder) { + return tensorflow::errors::Internal( + "failed to create TensorRT builder object"); + } + + LOG(DEBUG) << "BUILDING 3"; + + op_res->network = op_res->builder->createNetwork(); + if (!op_res->network) { + return tensorflow::errors::Internal( + "failed to create TensorRT network object"); + } + + LOG(DEBUG) << "BUILDING 4"; + + // Build the network + Converter converter(op_res->network); + + LOG(DEBUG) << "BUILDING 5"; + std::vector input_names; + std::vector input_dtypes; + for (std::pair const& input : s.input_inds) { + LOG(DEBUG) << "parsing input!!!!!"; + int node_id = input.first; + int output_idx = input.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + auto node_name = node->name(); + input_names.push_back(node_name); // insert original node name without port + // TODO(jie): alternative :) + // tensorflow::DataType tf_dtype = node->output_type(output_idx); + if (!s.graph_properties.HasOutputProperties(node_name)) + return tensorflow::errors::Internal("failed to find input node: " + + node_name); + + auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) + return tensorflow::errors::Internal( + "accessing output index of: " + std::to_string(output_idx) + + ", at node: " + node_name + "with output entry from shape_map: " + + std::to_string(op_info_vec.size())); + + auto op_info = op_info_vec.at(output_idx); + + tensorflow::DataType tf_dtype = op_info.dtype(); + input_dtypes.push_back(tf_dtype); + + nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + TF_CHECK_OK(convert_dtype(tf_dtype, &dtype)); + + LOG(DEBUG) << "accessing output index of: " << std::to_string(output_idx) + << ", at node: " << node_name + << "with output entry from shape_map: " + << std::to_string(op_info_vec.size()); + + // TODO(ben,jie): update TRT input format/dimension + nvinfer1::DimsCHW input_dim_psuedo_chw; + for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + + for (int i = 1; i < op_info.shape().dim_size(); i++) { + LOG(DEBUG) << "dimension: " << i + << " , size: " << op_info.shape().dim(i).size(); + input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); + } + + // TODO(ben,jie): proper way to restore input tensor name? + auto input_tensor_name = node_name; + if (output_idx != 0) + input_tensor_name = node_name + ":" + std::to_string(output_idx); + + nvinfer1::ITensor* input_tensor = converter.network()->addInput( + input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); + + if (!input_tensor) + return tensorflow::errors::InvalidArgument( + "Failed to create Input layer"); + LOG(DEBUG) << "input tensor name :" << input_tensor_name; + + if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) + return tensorflow::errors::AlreadyExists( + "output tensor already exists for op: " + input_tensor_name); + } + + LOG(DEBUG) << "finished sorting"; + + for (const tensorflow::Node* node : order) { + tensorflow::NodeDef const& node_def = node->def(); + LOG(DEBUG) << "converting node: " << node_def.name() << " , " + << node_def.op(); + TF_RETURN_IF_ERROR(converter.convert_node(node_def)); + } + + LOG(DEBUG) << "finished conversion"; + + // Gather output metadata + std::vector output_names; + std::vector output_dtypes; + for (std::pair const& output : s.output_inds) { + int node_id = output.first; + int output_idx = output.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + std::string op_name = node->name(); + std::string tensor_name = op_name; + if (output_idx != 0) + tensor_name = tensor_name + ":" + std::to_string(output_idx); + LOG(DEBUG) << "output tensor name: " << tensor_name; + output_names.push_back(tensor_name); + auto tensor_or_weights = converter.get_tensor(tensor_name); + if (!tensor_or_weights.is_tensor()) { + return tensorflow::errors::InvalidArgument( + "Output node is weights not tensor"); + } + nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); + if (!tensor) { + return tensorflow::errors::NotFound("Output tensor not found: " + + tensor_name); + } + converter.network()->markOutput(*tensor); + tensorflow::DataType tf_dtype = node->output_type(output_idx); + output_dtypes.push_back(tf_dtype); + nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; + TF_RETURN_IF_ERROR(convert_dtype(tf_dtype, &trt_dtype)); + tensor->setType(trt_dtype); + } + + LOG(DEBUG) << "finished output"; + + // Build the engine + op_res->builder->setMaxBatchSize(s.max_batch_size); + op_res->builder->setMaxWorkspaceSize(s.max_workspace_size); + + // Build the TRT op + // TODO(sami,ben,jie): proper naming! + tensorflow::NodeDefBuilder op_builder( + calib_op_name, "TRTCalibOp"); + std::vector income_edges; + for (size_t i = 0; i < input_names.size(); ++i) { + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) + auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(input_names.at(i), + output_idx, input_dtypes.at(i)); + income_edges.push_back(incoming_edge); + } + tensorflow::gtl::ArraySlice + input_list(income_edges); + op_builder.Input(input_list); + std::vector segment_names; + segment_names.reserve(s.subgraph_node_ids.size()); + for(int i : s.subgraph_node_ids){ + auto node=s.graph.FindNodeId(i); + segment_names.push_back(node->name()); + } + LOG(INFO) << "finished op preparation"; + + auto status = op_builder.Attr("segment_names", segment_names ) + .Attr("segment_output_names", output_names) + .Finalize(s.trt_node); + + LOG(INFO) << status.ToString(); + LOG(INFO) << "finished op building"; + + return tensorflow::Status::OK(); + +} tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& input_inds, - const std::vector>& output_inds, size_t max_batch_size, - size_t max_workspace_size, - const tensorflow::grappler::GraphProperties& graph_properties, - tensorflow::NodeDef* trt_node) { + tensorrt::convert::SubGraphParams &s +) { // Visit nodes in reverse topological order and construct the TRT network. // Toposort std::vector order_vec; - tensorflow::GetPostOrder(graph, &order_vec); + tensorflow::GetPostOrder(s.graph, &order_vec); // Select just the subgraph std::list order; for (tensorflow::Node* node : order_vec) { - if (subgraph_node_ids.count(node->id())) { + if (s.subgraph_node_ids.count(node->id())) { // order.push_back(node); order.push_front(node); // we want topological order to contstruct the // network layer by layer @@ -1601,20 +1795,20 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "BUILDING 5"; std::vector input_names; std::vector input_dtypes; - for (std::pair const& input : input_inds) { + for (std::pair const& input : s.input_inds) { LOG(DEBUG) << "parsing input!!!!!"; int node_id = input.first; int output_idx = input.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); auto node_name = node->name(); input_names.push_back(node_name); // insert original node name without port // TODO(jie): alternative :) // tensorflow::DataType tf_dtype = node->output_type(output_idx); - if (!graph_properties.HasOutputProperties(node_name)) + if (!s.graph_properties.HasOutputProperties(node_name)) return tensorflow::errors::Internal("failed to find input node: " + node_name); - auto op_info_vec = graph_properties.GetOutputProperties(node_name); + auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); if (static_cast(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( "accessing output index of: " + std::to_string(output_idx) + @@ -1676,10 +1870,10 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Gather output metadata std::vector output_names; std::vector output_dtypes; - for (std::pair const& output : output_inds) { + for (std::pair const& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); std::string op_name = node->name(); std::string tensor_name = op_name; if (output_idx != 0) @@ -1707,8 +1901,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "finished output"; // Build the engine - trt_builder->setMaxBatchSize(max_batch_size); - trt_builder->setMaxWorkspaceSize(max_workspace_size); + trt_builder->setMaxBatchSize(s.max_batch_size); + trt_builder->setMaxWorkspaceSize(s.max_workspace_size); LOG(INFO) << "starting build engine"; // TODO(ben,jie): half2 and int8 mode support std::string engine_plan_string; @@ -1736,7 +1930,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( "my_trt_op" + std::to_string(static_id++), "TRTEngineOp"); std::vector income_edges; for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = input_inds.at(i).second; + int output_idx = s.input_inds.at(i).second; // we wired up the input here already, it is redundant to do it again in // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(input_names.at(i), @@ -1753,7 +1947,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) - .Finalize(trt_node); + .Finalize(s.trt_node); LOG(INFO) << status.ToString(); LOG(INFO) << "finished op building"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index dc59c37892..9f552d0990 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -28,15 +28,37 @@ limitations under the License. namespace tensorrt { namespace convert { +struct SubGraphParams{ + SubGraphParams(const tensorflow::Graph &graph_, + const std::set &subgraph_node_ids_, + const std::vector> &input_inds_, + const std::vector> &output_inds_, + size_t max_batch_size_, + size_t max_workspace_size_, + const tensorflow::grappler::GraphProperties &graph_properties_, + tensorflow::NodeDef* trt_node_, + bool int8_=false):graph(graph_), subgraph_node_ids(subgraph_node_ids_), + input_inds(input_inds_),output_inds(output_inds_), + max_batch_size(max_batch_size_), + max_workspace_size(max_workspace_size_), + graph_properties(graph_properties_), + trt_node(trt_node_),int8(int8_){} + + const tensorflow::Graph &graph; + const std::set& subgraph_node_ids; + const std::vector>& input_inds; // {node_id, output_idx} + const std::vector>& output_inds; // {node_id, output_idx} + size_t max_batch_size; + size_t max_workspace_size; + const tensorflow::grappler::GraphProperties& graph_properties; + tensorflow::NodeDef* trt_node; + const bool int8; +}; + tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& - input_inds, // {node_id, output_idx} - const std::vector>& - output_inds, // {node_id, output_idx} - size_t max_batch_size, size_t max_workspace_size, - const tensorflow::grappler::GraphProperties& graph_prop, - tensorflow::NodeDef* trt_node); + SubGraphParams & params + ); +tensorflow::Status InjectCalibrationNode(SubGraphParams ¶ms); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc new file mode 100644 index 0000000000..6fdb583b9a --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -0,0 +1,68 @@ +// +// Created by skama on 1/25/18. +// + +#include "tensorflow/contrib/tensorrt/kernels/trt_calib_op.h" +#include +#include +#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResources.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +namespace tensorflow{ +namespace trt{ +TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context){ + OP_REQUIRES_OK(context, + context->GetAttr("segment_nodes", &segment_nodes_)); + OP_REQUIRES_OK(context, context->GetAttr("input_names", &input_names_)); + dev_tensors_.resize(segment_nodes_.size()); + +}; + +void TRTCalibOp::Compute(OpKernelContext *ctx) { + auto trt_rm = tensorflow::trt::TRTResourceManager::instance(); + auto resmgr = trt_rm->getManager(name()); + TRTCalibrationResource *calibRes= nullptr; + auto status=resmgr->Lookup(name(), name(), &calibRes); + if (status.ok()){ + int batchSize=ctx->input(0).dim_size(0); + int numInputs=ctx->num_inputs(); + if ( calibRes->calibrator == nullptr){// first run + for(int i = 0 ; i < numInputs; i++){ + const Tensor& t=ctx->input(i); + OP_REQUIRES_OK(ctx, ctx->allocate_persistent(t.dtype(), t.shape(),&dev_tensors_.at(i), nullptr)); + const auto dTensor=dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(),dTensor->TotalBytes()); + auto dType=t.dtype(); + void* devAddr=(void*)dTensor->flat::Type>().data(); + device_buffers_.emplace({input_names_.at(i),std::make_pair(devAddr,dTensor->TotalBytes())}); + } + calibRes->calibrator=new TRTInt8Calibrator(device_buffers_,batchSize); + auto builder=calibRes->builder; + calibRes->thr=new std::thread([calibRes](){ + calibRes->engine=calibRes->builder->buildCudaEngine(*calibRes->network); // will loop until we terminate calibrator + }); + } + std::unordered_map input_data; + for(int i = 0; i < numInputs; i++){ + const Tensor& t = ctx->input(i); + auto dType = t.dtype(); + void* data_address = (void*)t.flat::Type>().data(); + const auto dTensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), dTensor->TotalBytes()); // use the tensor so FW keeps it + input_data.emplace(input_names_.at(i), data_address); + ctx->set_output(i,t); + } + calibRes->calibrator->setBatch(input_data); + }else{ + ctx->SetStatus(status); + return; + } + +}; + +} +} \ No newline at end of file diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h new file mode 100644 index 0000000000..aefafb29d5 --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h @@ -0,0 +1,35 @@ +// +// Created by skama on 1/25/18. +// + +#ifndef TFGITHUB_TRT_CALIB_OP_H +#define TFGITHUB_TRT_CALIB_OP_H + +#include +#include +#include +#include +#include +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" + +namespace tensorflow { +namespace trt { +class TRTCalibOp: public OpKernel { +public: + explicit TRTCalibOp(OpKernelConstruction* context); + + void Compute(OpKernelContext* context) override; + + private: + std::vector segment_nodes_; + std::vector input_names_; + std::vector shapes_; + std::unordered_map> device_buffers_; + std::vector dev_tensors_; + +}; +} +} +#endif //TFGITHUB_TRT_CALIB_OP_H diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index a1524a592a..54b8d0d431 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,8 +24,8 @@ limitations under the License. namespace tensorflow { static ::tensorflow::tensorrt::Logger gLogger; -using namespace nvinfer1; - +using IRuntime=nvinfer1::IRuntime; +using Dims=nvinfer1::Dims; namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { @@ -44,7 +44,7 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - IRuntime* infer = createInferRuntime(gLogger); + IRuntime* infer = nvinfer1::createInferRuntime(gLogger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); diff --git a/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc new file mode 100644 index 0000000000..ddf2baa526 --- /dev/null +++ b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc @@ -0,0 +1,34 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +namespace tensorflow { + + +REGISTER_OP("TRTCalibOp") + .Attr("segment_nodes: list(string)") // names of the ops in segment + .Attr("segment_output_names: list(string)") // names of the output ops in segment + .Attr("InT: list({int8, float16, float32})") + .Input("in_tensor: InT") + .Output("out_tensor: InT") + .SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) { + for (int i = 0; i < c->num_inputs(); i++){ + c->set_output(i, c->input(i)); + } + return Status::OK(); + }); + +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 354f0c8b42..5aba371a03 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -30,7 +30,7 @@ from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops -def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace_size=2<<20): +def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace_size=2<<20, int8=False): """Python wrapper for the TRT transormation. @@ -76,7 +76,7 @@ def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace # transformed graphs protobuf string. out = trt_convert( optimized_graph_def_str ,outputs, - max_batch_size,max_workspace_size) + max_batch_size,max_workspace_size,int8) status = out[0] output_graph_def_string = out[1] del optimized_graph_def_str #save some memory diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc index 3c94b52ea6..fe414c45ce 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -9,7 +9,7 @@ namespace tensorflow { namespace trt { - +// set the batch size before constructing the thread to execute engine int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } bool TRTInt8Calibrator::setBatch( diff --git a/tensorflow/contrib/tensorrt/resources/TRTResources.h b/tensorflow/contrib/tensorrt/resources/TRTResources.h index 2b65017943..2fe78b882d 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTResources.h +++ b/tensorflow/contrib/tensorrt/resources/TRTResources.h @@ -6,27 +6,40 @@ #define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ -#include #include -#include "tensorflow/contrib/tensorrt/resourcemgr/TRTInt8Calibrator.h" +#include +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" #include "tensorflow/core/framework/resource_mgr.h" namespace tensorflow { namespace trt { struct TRTCalibrationResource : public tensorflow::ResourceBase { - TRTCalibrationResource():calibrator(nullptr), builder(nullptr), thr(nullptr){}; + TRTCalibrationResource() + : calibrator(nullptr), + builder(nullptr), + network(nullptr), + engine(nullptr), + logger(nullptr), + thr(nullptr) {} + string DebugString() override { + return ""; + } TRTInt8Calibrator* calibrator; nvinfer1::IBuilder* builder; - std::thread *thr; + nvinfer1::INetworkDefinition* network; + nvinfer1::ICudaEngine* engine; + tensorflow::tensorrt::Logger* logger; + std::thread* thr; }; -struct TRTEngineResource:public tensorflow::ResourceBase{ - TRTEngineResource():runtime(nullptr), ctx(nullptr){}; - nvinfer1::IRuntime *runtime; - nvinfer1::IExecutionContext *ctx; +struct TRTEngineResource : public tensorflow::ResourceBase { + TRTEngineResource() : runtime(nullptr), ctx(nullptr){}; + nvinfer1::IRuntime* runtime; + nvinfer1::IExecutionContext* ctx; }; -} -} -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ +} // namespace trt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 5f8e73a59f..3e8baf91ae 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -28,7 +28,8 @@ std::pair trt_convert(string graph_def_string,//const tensorflow::GraphDef& std::vector output_names, size_t max_batch_size, - size_t max_workspace_size + size_t max_workspace_size_bytes, + bool int8 // unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -57,8 +58,8 @@ tensorrt::convert::ConvertGraphDefToTensorRT(graph_def, output_names, max_batch_size, - max_workspace_size, - &outGraph); + max_workspace_size_bytes, + &outGraph,int8); if (!conversion_status.ok()) { auto retCode=(int)conversion_status.code(); char buff[2000]; @@ -79,6 +80,6 @@ std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, - size_t max_workspace_size); + size_t max_workspace_size,bool int8); %unignoreall -- GitLab From 573c6f40a90ace2bc921738937fea32fdf724f7b Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Mon, 5 Feb 2018 13:36:22 -0800 Subject: [PATCH 0043/2939] Bump the required numpy version in r1.6 --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 2002786999..fe2c22f2f5 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -36,7 +36,7 @@ REQUIRED_PACKAGES = [ 'astor >= 0.6.0', 'gast >= 0.2.0', 'grpcio >= 1.8.6', - 'numpy >= 1.12.1', + 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', 'tensorflow-tensorboard >= 1.5.0, < 1.6.0', -- GitLab From adaabc11680fa2823d029cf67214b23fa6652a4b Mon Sep 17 00:00:00 2001 From: Jie Date: Mon, 5 Feb 2018 18:56:48 -0800 Subject: [PATCH 0044/2939] [DEBUG] multiple GPU crash with [cuda_illigal_memory_address] added cudaSetDevice before ICudaEngine::createExecutionContext() To make sure TRT engine gets allocated on the same GPU (to access IO memory) --- .../contrib/tensorrt/kernels/trt_engine_op.cc | 26 ++++++++++++++++--- .../contrib/tensorrt/segment/segment.cc | 10 ------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 445900f08c..81fd4c9747 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -44,11 +44,22 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager + // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same + // gpu where the input/output is also located. + int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; + cudaSetDevice(gpu_id); + int device; + cudaGetDevice(&device); + if (gpu_id != device) + LOG(FATAL) << "set device failed!"; + IRuntime* infer = createInferRuntime(gLogger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); trt_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); + + // trt_context_ptr_.reset(nullptr); // runtime is safe to delete after engine creation infer->destroy(); std::stringstream oss; @@ -103,12 +114,16 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { nbBatch = input_shape.dim_size(0); + if (nbBatch > trt_engine_ptr_->getMaxBatchSize()) + LOG(FATAL) << "input tensor batch larger than max_batch_size: " + << trt_engine_ptr_->getMaxBatchSize(); } else if (nbBatch != input_shape.dim_size(0)) { valid = false; break; } // int64 input_shape.dim_size(int d) // int input_shape.dims() + LOG(INFO) << "INPUT BINDING index: " << bindingIndex << " with name: " << input_nodes_[i]; switch (trt_engine_ptr_->getBindingDataType(bindingIndex)) { case nvinfer1::DataType::kFLOAT: LOG(INFO) << "float"; @@ -125,7 +140,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } - if (!valid) LOG(WARNING) << "input data inconsistent batch size"; + if (!valid) LOG(FATAL) << "input data inconsistent batch size"; for (int i = 0; i < static_cast(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. @@ -135,7 +150,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { TensorShape output_shape; if (bindingIndex != -1) { - LOG(INFO) << "got binding " << bindingIndex; + LOG(INFO) << "got binding " << bindingIndex << " with name: " << output_nodes_[i]; auto dims = trt_engine_ptr_->getBindingDimensions(bindingIndex); std::vector trt_shape(dims.nbDims + 1); trt_shape[0] = nbBatch; @@ -167,6 +182,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { break; } } + LOG(INFO) << "getting stream"; // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files const cudaStream_t* stream = CHECK_NOTNULL( reinterpret_cast(context->op_device_context() @@ -174,9 +190,11 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->implementation() ->CudaStreamMemberHack())); - trt_context_ptr_->enqueue(nbBatch, &buffers[0], *stream, nullptr); + // TODO(jie): trt enqueue does not return error + LOG(INFO) << "enqueue returns: " << trt_context_ptr_->enqueue(nbBatch, &buffers[0], *stream, nullptr); + LOG(INFO) << "all good"; // sync should be done by TF. - //cudaStreamSynchronize(*stream); + // cudaStreamSynchronize(*stream); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc index 41da528247..d749d0d0e8 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.cc +++ b/tensorflow/contrib/tensorrt/segment/segment.cc @@ -220,16 +220,6 @@ tensorflow::Status SegmentGraph( } } - // Cleanup the graph to remove disconnected nodes before outputting - if (VLOG_IS_ON(2)) { - for (tensorflow::Node* node : graph.nodes()) { - if ((node->in_edges().size() == 0) && (node->out_edges().size() == 0)) { - graph.RemoveNode(node); - } - } - // tensorflow::DumpGraph("Post-Segment", &graph); - } - // Convert the segments into the expected return format for (const auto& itr : sg_map) { const auto& segment_node_names = itr.second; -- GitLab From e2a0db74cfa4ed73692ec5d0af944660bb4b688c Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Tue, 6 Feb 2018 17:52:07 -0800 Subject: [PATCH 0045/2939] Python3 support of docs generation --- tensorflow/docs_src/community/documentation.md | 18 +++--------------- tensorflow/tools/docs/BUILD | 2 +- tensorflow/tools/docs/build_docs_test.py | 4 ---- tensorflow/tools/docs/generate_lib.py | 2 -- tensorflow/tools/docs/generate_lib_test.py | 3 --- tensorflow/tools/docs/parser.py | 4 ++-- tensorflow/tools/docs/parser_test.py | 4 ---- tensorflow/tools/docs/pretty_docs.py | 12 ++++++------ tensorflow/workspace.bzl | 11 ----------- 9 files changed, 12 insertions(+), 48 deletions(-) diff --git a/tensorflow/docs_src/community/documentation.md b/tensorflow/docs_src/community/documentation.md index 003e0a25ec..8d55148e48 100644 --- a/tensorflow/docs_src/community/documentation.md +++ b/tensorflow/docs_src/community/documentation.md @@ -148,19 +148,7 @@ viewing. Do not include url parameters in the source code URL. Before building the documentation, you must first set up your environment by doing the following: -1. If pip isn't installed on your machine, install it now by issuing the -following command: - - $ sudo easy_install pip - -2. Use pip to install codegen, mock, and pandas by issuing the following - command (Note: If you are using - a [virtualenv](https://virtualenv.pypa.io/en/stable/) to manage your - dependencies, you may not want to use sudo for these installations): - - $ sudo pip install codegen mock pandas - -3. If bazel is not installed on your machine, install it now. If you are on +1. If bazel is not installed on your machine, install it now. If you are on Linux, install bazel by issuing the following command: $ sudo apt-get install bazel # Linux @@ -168,10 +156,10 @@ following command: If you are on Mac OS, find bazel installation instructions on [this page](https://bazel.build/versions/master/docs/install.html#mac-os-x). -4. Change directory to the top-level `tensorflow` directory of the TensorFlow +2. Change directory to the top-level `tensorflow` directory of the TensorFlow source code. -5. Run the `configure` script and answer its prompts appropriately for your +3. Run the `configure` script and answer its prompts appropriately for your system. $ ./configure diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index 8f10bc9e0c..cafa1f7eb3 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -37,7 +37,7 @@ py_library( srcs = ["parser.py"], srcs_version = "PY2AND3", visibility = ["//visibility:public"], - deps = ["@com_github_andreif_codegen"], + deps = ["@astor_archive//:astor"], ) py_test( diff --git a/tensorflow/tools/docs/build_docs_test.py b/tensorflow/tools/docs/build_docs_test.py index ae293f6576..2e8f634e7c 100644 --- a/tensorflow/tools/docs/build_docs_test.py +++ b/tensorflow/tools/docs/build_docs_test.py @@ -39,10 +39,6 @@ class Flags(object): class BuildDocsTest(googletest.TestCase): def testBuildDocs(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - doc_generator = generate_lib.DocGenerator() doc_generator.set_py_modules([('tf', tf), ('tfdbg', tf_debug)]) diff --git a/tensorflow/tools/docs/generate_lib.py b/tensorflow/tools/docs/generate_lib.py index 003f972070..635408d87f 100644 --- a/tensorflow/tools/docs/generate_lib.py +++ b/tensorflow/tools/docs/generate_lib.py @@ -455,8 +455,6 @@ class DocGenerator(object): """Main entry point for generating docs.""" def __init__(self): - if sys.version_info >= (3, 0): - sys.exit('Doc generation is not supported from python3.') self.argument_parser = argparse.ArgumentParser() self._py_modules = None self._private_map = _get_default_private_map() diff --git a/tensorflow/tools/docs/generate_lib_test.py b/tensorflow/tools/docs/generate_lib_test.py index 1ceaf31f1c..ea6d28a02b 100644 --- a/tensorflow/tools/docs/generate_lib_test.py +++ b/tensorflow/tools/docs/generate_lib_test.py @@ -52,9 +52,6 @@ class DummyVisitor(object): class GenerateTest(googletest.TestCase): def test_write(self): - if sys.version_info >= (3, 0): - self.skipTest('Warning: Doc generation is not supported from python3.') - module = sys.modules[__name__] index = { diff --git a/tensorflow/tools/docs/parser.py b/tensorflow/tools/docs/parser.py index 3db164c2b5..1798378d55 100644 --- a/tensorflow/tools/docs/parser.py +++ b/tensorflow/tools/docs/parser.py @@ -26,7 +26,7 @@ import os import re import sys -import codegen +import astor import six from google.protobuf.message import Message as ProtoMessage @@ -705,7 +705,7 @@ def _generate_signature(func, reverse_index): if id(default) in reverse_index: default_text = reverse_index[id(default)] elif ast_default is not None: - default_text = codegen.to_source(ast_default) + default_text = astor.to_source(ast_default) if default_text != repr(default): # This may be an internal name. If so, handle the ones we know about. # TODO(wicke): This should be replaced with a lookup in the index. diff --git a/tensorflow/tools/docs/parser_test.py b/tensorflow/tools/docs/parser_test.py index 8a0e9af521..7d2bf9177a 100644 --- a/tensorflow/tools/docs/parser_test.py +++ b/tensorflow/tools/docs/parser_test.py @@ -523,10 +523,6 @@ class TestParseFunctionDetails(googletest.TestCase): class TestGenerateSignature(googletest.TestCase): def test_known_object(self): - if sys.version_info >= (3, 0): - print('Warning: Doc generation is not supported from python3.') - return - known_object = object() reverse_index = {id(known_object): 'location.of.object.in.api'} diff --git a/tensorflow/tools/docs/pretty_docs.py b/tensorflow/tools/docs/pretty_docs.py index 543b5fa6fe..55ab5bdd49 100644 --- a/tensorflow/tools/docs/pretty_docs.py +++ b/tensorflow/tools/docs/pretty_docs.py @@ -101,7 +101,7 @@ def _build_class_page(page_info): link_template = '[`{short_name}`]({url})' parts.append(', '.join( - link_template.format(**base.__dict__) for base in page_info.bases)) + link_template.format(**base._asdict()) for base in page_info.bases)) parts.append('\n\n') @@ -159,7 +159,7 @@ def _build_class_page(page_info): h3 = ('

' '{short_name}' '

\n\n') - parts.append(h3.format(**method_info.__dict__)) + parts.append(h3.format(**method_info._asdict())) if method_info.signature is not None: parts.append(_build_signature(method_info, use_full_name=False)) @@ -217,7 +217,7 @@ def _build_module_page(page_info): template = '[`{short_name}`]({url}) module' for item in page_info.modules: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -229,7 +229,7 @@ def _build_module_page(page_info): template = '[`class {short_name}`]({url})' for item in page_info.classes: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -241,7 +241,7 @@ def _build_module_page(page_info): template = '[`{short_name}(...)`]({url})' for item in page_info.functions: - parts.append(template.format(**item.__dict__)) + parts.append(template.format(**item._asdict())) if item.doc.brief: parts.append(': ' + item.doc.brief) @@ -254,7 +254,7 @@ def _build_module_page(page_info): parts.append('## Other Members\n\n') for item in page_info.other_members: - parts.append('`{short_name}`\n\n'.format(**item.__dict__)) + parts.append('`{short_name}`\n\n'.format(**item._asdict())) return ''.join(parts) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index eca744a920..4a2274eb1a 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -328,17 +328,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): build_file = str(Label("//third_party:backports_weakref.BUILD")), ) - tf_http_archive( - name = "com_github_andreif_codegen", - urls = [ - "https://mirror.bazel.build/github.com/andreif/codegen/archive/1.0.tar.gz", - "https://github.com/andreif/codegen/archive/1.0.tar.gz", - ], - sha256 = "2dadd04a2802de27e0fe5a19b76538f6da9d39ff244036afa00c1bba754de5ee", - strip_prefix = "codegen-1.0", - build_file = str(Label("//third_party:codegen.BUILD")), - ) - filegroup_external( name = "org_python_license", licenses = ["notice"], # Python 2.0 -- GitLab From 4f5d9a88f84e2261808bc986ece951e6e1d10725 Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Tue, 6 Feb 2018 17:55:15 -0800 Subject: [PATCH 0046/2939] remove unused codegen.BUILD --- third_party/codegen.BUILD | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 third_party/codegen.BUILD diff --git a/third_party/codegen.BUILD b/third_party/codegen.BUILD deleted file mode 100644 index df436c8163..0000000000 --- a/third_party/codegen.BUILD +++ /dev/null @@ -1,16 +0,0 @@ -# -*- mode: python; -*- -# -# Description: -# Extension to ast that allow ast -> python code generation. - -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) # New BSD - -exports_files(["LICENSE"]) - -py_library( - name = "com_github_andreif_codegen", - srcs = glob(["codegen.py"]), - srcs_version = "PY2AND3", -) -- GitLab From 83f06ec185ee87fc57220f2f63245d81aa3c9311 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 7 Feb 2018 20:15:10 -0800 Subject: [PATCH 0047/2939] Adding the Visual Studio specification for cmake for the 1.6 branch. (#16852) --- tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index b87e4a9bec..e545146188 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -37,7 +37,7 @@ SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX -G "Visual Studio 14 2015" :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From 963941e7d639b3211a5792b1086128db554a740a Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Fri, 9 Feb 2018 13:15:31 -0800 Subject: [PATCH 0048/2939] Update tensorboard dependency to 1.6.0+ and new name (#16815) * Update tensorboard dependency to 1.6.0+ and new name * Mention tensorboard package name change in RELEASE.md --- RELEASE.md | 4 ++++ tensorflow/tools/pip_package/setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/RELEASE.md b/RELEASE.md index 0fad3b5d41..de4a34bb04 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -59,6 +59,10 @@ newcomers. TensorFlow will print a warning if you use XLA:GPU with a known-bad version of CUDA; see e00ba24c4038e7644da417ddc639169b6ea59122. +* The `tensorboard` command or module may appear to be missing after certain + upgrade flows. This is due to pip package conflicts as a result of changing + the TensorBoard package name. See the [TensorBoard 1.6.0 release notes]( + https://github.com/tensorflow/tensorboard/releases/tag/1.6.0) for a fix. ## Thanks to our Contributors diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index fe2c22f2f5..0d4fa465ad 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -39,7 +39,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.4.0', - 'tensorflow-tensorboard >= 1.5.0, < 1.6.0', + 'tensorboard >= 1.6.0, < 1.7.0', 'termcolor >= 1.1.0', ] -- GitLab From 61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Sat, 10 Feb 2018 09:50:40 -0800 Subject: [PATCH 0049/2939] Update version string to 1.6.0-rc1 --- .../contrib/tpu/profiler/pip_package/setup.py | 2 +- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 14 ++++++------ tensorflow/tools/pip_package/setup.py | 2 +- 9 files changed, 39 insertions(+), 39 deletions(-) diff --git a/tensorflow/contrib/tpu/profiler/pip_package/setup.py b/tensorflow/contrib/tpu/profiler/pip_package/setup.py index cb61984799..52984cd6fd 100644 --- a/tensorflow/contrib/tpu/profiler/pip_package/setup.py +++ b/tensorflow/contrib/tpu/profiler/pip_package/setup.py @@ -20,7 +20,7 @@ from __future__ import print_function from setuptools import setup -_VERSION = '1.6.0-rc0' +_VERSION = '1.6.0-rc1' CONSOLE_SCRIPTS = [ 'capture_tpu_profile=cloud_tpu_profiler.main:run_main', diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 50bfa91267..7405e01e14 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index a783205b4a..f3620cf687 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 5249e04615..4bf4bacaec 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 0c6c773e62..1905f9729e 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0-rc0 + 1.6.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0-rc0 + 1.6.0-rc1 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0-rc0 + 1.6.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.6.0-rc0 + 1.6.0-rc1 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc1.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0-rc0.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0-rc1.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0-rc0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0-rc0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 105b225177..62bd45650a 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index a6ea548cfb..e3832a7a2a 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -115,7 +115,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -238,7 +238,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -347,7 +347,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl @@ -520,7 +520,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
 
@@ -528,5 +528,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 36dffd85dc..051da692d3 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0rc0 on Linux: +for TensorFlow 1.6.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-py2-none-any.whl
 
## Validate your installation @@ -460,8 +460,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0rc0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.5.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
- - + + @@ -479,7 +479,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0rc0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
tensorflow_gpu-1.5.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.079
tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
- + @@ -493,8 +493,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.6.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
- - + + diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 0d4fa465ad..a835275dae 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0-rc0' +_VERSION = '1.6.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 1eca578242eda2db93cdb2509413996e9294751e Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 5 Feb 2018 13:43:52 -0800 Subject: [PATCH 0050/2939] [tf.data] Fix use-after-free bug when closing down an input pipeline. This fix affects the distributed runtime; DirectSession use is unaffected. Before this change, an iterator that used a background prefetching thread might attempt to use a captured FunctionLibraryRuntime from a subgraph that had been deregistered (and hence its FunctionLibraryRuntime would have been deleted). This change introduces a mechanism for "cloning" the necessary parts of the FunctionLibraryRuntime so that it can be owned by the IteratorResource. PiperOrigin-RevId: 184579490 --- tensorflow/core/common_runtime/function.cc | 19 ++++++++++++ .../core/common_runtime/graph_optimizer.h | 2 ++ .../process_function_library_runtime.cc | 12 ++++++++ .../process_function_library_runtime.h | 6 ++++ tensorflow/core/framework/function.h | 5 ++++ tensorflow/core/kernels/data/iterator_ops.cc | 7 +++-- .../kernel_tests/iterator_ops_cluster_test.py | 30 +++++++++++++++++++ 7 files changed, 79 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 150fb85c70..248ff9051b 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -183,6 +183,10 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { string DebugString(Handle h) override; + Status Clone(std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr, + FunctionLibraryRuntime** out_flr) override; + private: typedef FunctionLibraryRuntimeImpl ME; @@ -895,6 +899,21 @@ string FunctionLibraryRuntimeImpl::DebugString(Handle handle) { } } +Status FunctionLibraryRuntimeImpl::Clone( + std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr, + FunctionLibraryRuntime** out_flr) { + TF_RETURN_IF_ERROR( + parent_->Clone(env_, graph_def_version_, optimizer_.options(), + custom_kernel_creator_, out_lib_def, out_pflr)); + *out_flr = (*out_pflr)->GetFLR(device_->name()); + if (out_flr != nullptr) { + return Status::OK(); + } else { + return errors::Internal("Cloning FunctionLibraryRuntime failed."); + } +} + namespace { struct CustomCreatorSingleton { diff --git a/tensorflow/core/common_runtime/graph_optimizer.h b/tensorflow/core/common_runtime/graph_optimizer.h index 8477cea126..80246281cd 100644 --- a/tensorflow/core/common_runtime/graph_optimizer.h +++ b/tensorflow/core/common_runtime/graph_optimizer.h @@ -52,6 +52,8 @@ class GraphOptimizer { shape_map, const std::function& cse_consider_fn = nullptr); + const OptimizerOptions& options() { return opts_; } + private: OptimizerOptions opts_; diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index dd4bf6a345..41e1ce8c15 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -350,4 +350,16 @@ void ProcessFunctionLibraryRuntime::Run( done(errors::Internal("Could not find device")); } +Status ProcessFunctionLibraryRuntime::Clone( + Env* env, int graph_def_version, const OptimizerOptions& optimizer_options, + CustomKernelCreator custom_kernel_creator, + std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr) { + out_lib_def->reset(new FunctionLibraryDefinition(*lib_def_)); + out_pflr->reset(new ProcessFunctionLibraryRuntime( + device_mgr_, env, graph_def_version, out_lib_def->get(), + optimizer_options, std::move(custom_kernel_creator), parent_)); + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h index 9c9c92f1ea..4296f9449f 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.h +++ b/tensorflow/core/common_runtime/process_function_library_runtime.h @@ -145,6 +145,12 @@ class ProcessFunctionLibraryRuntime { // Removes handle from the state owned by this object. Status RemoveHandle(FunctionLibraryRuntime::Handle handle); + Status Clone(Env* env, int graph_def_version, + const OptimizerOptions& optimizer_options, + CustomKernelCreator custom_kernel_creator, + std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr); + friend class FunctionLibraryRuntimeImpl; mutable mutex mu_; diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index b933ee0b0e..7498cde637 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -35,6 +35,7 @@ namespace tensorflow { class CancellationManager; class GraphDef; class OpKernel; +class ProcessFunctionLibraryRuntime; class ResourceMgr; class Rendezvous; class ScopedStepContainer; @@ -534,6 +535,10 @@ class FunctionLibraryRuntime { virtual int graph_def_version() = 0; typedef uint64 LocalHandle; + + virtual Status Clone(std::unique_ptr* out_lib_def, + std::unique_ptr* out_pflr, + FunctionLibraryRuntime** out_flr) = 0; }; // Returns a canonicalized string for the instantiation of the diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index dd5f4a4554..8a420ac26d 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -459,7 +459,7 @@ class IteratorHandleOp : public OpKernel { { mutex_lock l(mu_); if (resource_ == nullptr) { - FunctionLibraryRuntime* lib = context->function_library(); + FunctionLibraryRuntime* lib; std::unique_ptr device_mgr(nullptr); std::unique_ptr flib_def(nullptr); std::unique_ptr pflr(nullptr); @@ -469,6 +469,9 @@ class IteratorHandleOp : public OpKernel { // is sufficient demand, but it will require a significant refactoring. if (!name_.empty()) { lib = CreatePrivateFLR(context, &device_mgr, &flib_def, &pflr); + } else { + OP_REQUIRES_OK(context, context->function_library()->Clone( + &flib_def, &pflr, &lib)); } ResourceMgr* mgr = context->resource_manager(); @@ -538,7 +541,7 @@ class IteratorHandleOp : public OpKernel { // Wrap the existing device in order to see any captured resources // in its resource manager. The existing device will outlive the // IteratorResource, because we are storing the IteratorResource - // in that device's resourc manager. + // in that device's resource manager. Device* wrapped_device = RenamedDevice::NewRenamedDevice( ctx->device()->name(), down_cast(ctx->device()), false /* owns_underlying */, false /* isolate_session_state */); diff --git a/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py index 2c65c49ebd..25c91b42dc 100644 --- a/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py +++ b/tensorflow/python/data/kernel_tests/iterator_ops_cluster_test.py @@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops @@ -30,6 +32,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops from tensorflow.python.ops import lookup_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops from tensorflow.python.platform import test @@ -140,6 +143,33 @@ class IteratorClusterTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testImplicitDisposeParallelMapDataset(self): + # Tests whether a parallel map dataset will be cleaned up correctly when + # the pipeline does not run it until exhaustion. + # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> + # RepeatDataset(None) -> PrefetchDataset(100). + worker, _ = test_util.create_local_cluster(1, 1) + + components = (np.arange(1000), + np.array([[1, 2, 3]]) * np.arange(1000)[:, np.newaxis], + np.array(37.0) * np.arange(1000)) + + def _map_fn(x, y, z): + return math_ops.square(x), math_ops.square(y), math_ops.square(z) + + dataset = ( + dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) + .repeat(None).prefetch(10000)) + + iterator = dataset.make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with session.Session(worker[0].target) as sess: + sess.run(init_op) + for _ in range(3): + sess.run(get_next) + if __name__ == "__main__": test.main() -- GitLab From d4ad85b765f5945bf35a256f0dd2e9a5278de3e5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Feb 2018 09:40:53 -0800 Subject: [PATCH 0051/2939] Fix bug in and speed up ConstantFolding::CreateNodeDef(): * Fix bug trying to store more than kintmax32 values in a repeated proto field. * Speed up populating compressed format. Example: tensorflow/python/kernel_tests/large_concat_op_test with size = 2**29+6 goes from ~30 seconds to ~15 seconds. The fraction of time spent in ConstantFolding::CreateNodeDef() goes down from about 35% to about 12%. PiperOrigin-RevId: 184693749 --- .../grappler/optimizers/constant_folding.cc | 34 +++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 0aeff6222c..2caefdf4bd 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -808,20 +808,26 @@ NodeDef ConstantFolding::CreateNodeDef(const string& name, // Use the packed representation whenever possible to avoid generating large // graphdefs. Moreover, avoid repeating the last values if they're equal. if (tensor->NumElements() > 4) { -#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME) \ - optimized = true; \ - TYPE last = tensor->flat()(0); \ - int last_index = 0; \ - for (int i = 0; i < tensor->NumElements(); ++i) { \ - TYPE cur = tensor->flat()(i); \ - t->add_##NAME##_val(cur); \ - if (cur != last) { \ - last = cur; \ - last_index = i; \ - } \ - } \ - /* Remove all identical trailing values to save memory. */ \ - t->mutable_##NAME##_val()->Truncate(last_index + 1); +#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME) \ + const TYPE* val_ptr = tensor->flat().data(); \ + TYPE last = *val_ptr; \ + int64 last_index = 0; \ + for (int64 i = 0; i < tensor->NumElements(); ++i) { \ + TYPE cur = *val_ptr++; \ + if (cur != last) { \ + last = cur; \ + last_index = i; \ + } \ + } \ + if (last_index < kint32max) { \ + optimized = true; \ + t->mutable_##NAME##_val()->Reserve(last_index + 1); \ + t->mutable_##NAME##_val()->AddNAlreadyReserved(last_index + 1); \ + val_ptr = tensor->flat().data(); \ + for (int64 i = 0; i <= last_index; ++i) { \ + t->set_##NAME##_val(i, *val_ptr++); \ + } \ + } if (tensor->dtype() == DT_FLOAT) { POPULATE_TENSOR_PROTO(tensor, t, float, float) -- GitLab From c829c71afb14cd41079231b3c3f33fad5e119679 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 6 Feb 2018 17:37:02 -0800 Subject: [PATCH 0052/2939] [tf.data] Fix a memory leak when an iterator is reinitialized many times in a session. Previously, we would instantiate a new function handle for each function in a dataset each time an iterator on that dataset was initialized. These would only be deleted at session closure, which could lead to an apparent leak of memory over the lifetime of session. PiperOrigin-RevId: 184768730 --- .../core/kernels/data/captured_function.cc | 6 +++++- tensorflow/core/kernels/data/iterator_ops.cc | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/data/captured_function.cc b/tensorflow/core/kernels/data/captured_function.cc index f3e4f1cd3f..f248f7897f 100644 --- a/tensorflow/core/kernels/data/captured_function.cc +++ b/tensorflow/core/kernels/data/captured_function.cc @@ -32,7 +32,11 @@ Status CapturedFunction::Create( return Status::OK(); } -CapturedFunction::~CapturedFunction() {} +CapturedFunction::~CapturedFunction() { + if (lib_ != nullptr) { + lib_->ReleaseHandle(f_handle_).IgnoreError(); + } +} namespace { class CallFrameBase : public CallFrameInterface { diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 8a420ac26d..fc3e291afb 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -725,18 +725,23 @@ class OneShotIteratorOp : public AsyncOpKernel { Status TryInit(OpKernelContext* ctx, IteratorResource** iterator, ContainerInfo* cinfo) { TF_RETURN_IF_ERROR(cinfo->Init(ctx->resource_manager(), def())); - FunctionLibraryRuntime* lib = ctx->function_library(); + + FunctionLibraryRuntime* lib; + std::unique_ptr flib_def(nullptr); + std::unique_ptr pflr(nullptr); + TF_RETURN_IF_ERROR(ctx->function_library()->Clone(&flib_def, &pflr, &lib)); // Create an IteratorResource that will hold the iterator for this op. TF_RETURN_IF_ERROR( ctx->resource_manager()->LookupOrCreate( cinfo->container(), cinfo->name(), iterator, - [lib, this](IteratorResource** ret) EXCLUSIVE_LOCKS_REQUIRED(mu_) { - *ret = new IteratorResource(output_dtypes_, output_shapes_, - graph_def_version_, nullptr, nullptr, - nullptr, lib); - return Status::OK(); - })); + [lib, this, &flib_def, &pflr](IteratorResource** ret) + EXCLUSIVE_LOCKS_REQUIRED(mu_) { + *ret = new IteratorResource( + output_dtypes_, output_shapes_, graph_def_version_, + nullptr, std::move(flib_def), std::move(pflr), lib); + return Status::OK(); + })); core::ScopedUnref unref_iterator(*iterator); -- GitLab From 0851b150f3fed192db0f51e0a794d2a667b1bc66 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Tue, 6 Feb 2018 20:40:00 -0800 Subject: [PATCH 0053/2939] TPUEstimator: Revert the global_step change and require the user to explicitly pass it. PiperOrigin-RevId: 184784330 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 52 ++++--------------- 1 file changed, 11 insertions(+), 41 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 56793f11d9..2aec7ce707 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -485,7 +485,7 @@ class TPUEstimatorSpec( if self.eval_metrics is not None: host_calls['eval_metrics'] = self.eval_metrics if self.host_call is not None: - host_calls['host_call'] = wrap_hostcall_with_global_step(self.host_call) + host_calls['host_call'] = self.host_call host_call_ret = _OutfeedHostCall.create_cpu_hostcall(host_calls) eval_metric_ops = None if self.eval_metrics is not None: @@ -1303,19 +1303,21 @@ class _ModelFnWrapper(object): self._call_model_fn(features, labels)) loss, train_op = estimator_spec.loss, estimator_spec.train_op - host_call_outfeed_ops = [] if isinstance(estimator_spec, TPUEstimatorSpec): captured_scaffold_fn.capture(estimator_spec.scaffold_fn) - if estimator_spec.host_call is not None: - host_call.record({ - 'host_call': wrap_hostcall_with_global_step( - estimator_spec.host_call)}) - host_call_outfeed_ops = host_call.create_enqueue_op() else: captured_scaffold_fn.capture(None) - with ops.control_dependencies([train_op] + host_call_outfeed_ops): - return array_ops.identity(loss) + # We must run train_op to update the variables prior to running the + # outfeed. + with ops.control_dependencies([train_op]): + host_call_outfeed_ops = [] + if (isinstance(estimator_spec, TPUEstimatorSpec) and + estimator_spec.host_call is not None): + host_call.record({'host_call': estimator_spec.host_call}) + host_call_outfeed_ops = host_call.create_enqueue_op() + with ops.control_dependencies(host_call_outfeed_ops): + return array_ops.identity(loss) return train_step, host_call, captured_scaffold_fn @@ -1657,38 +1659,6 @@ class _OutfeedHostCall(object): return ret -def wrap_hostcall_with_global_step(hostcall): - """Wrap the hostcall so that we update the global step upon every call.""" - if hostcall is None: - return None - host_fn, tensors = hostcall - - def global_step_host_fn(_global_step, *args, **kwargs): # pylint: disable=invalid-name - # Note that we don't have any ordering here, so the graph may see a - # global_step that's off by 1. - state_ops.assign( - training.get_global_step(), - math_ops.cast(_global_step[0], dtypes.int64)) - return host_fn(*args, **kwargs) - # Give the global step tensor a batch dimension. Reshape is not supported for - # int64, so we cast it to int32. - # TODO(jhseu): Remove the cast once int64 is supported. - global_step_tensor = array_ops.reshape( - math_ops.cast(training.get_global_step(), dtypes.int32), [1]) - if isinstance(tensors, dict): - outfeed_tensors = {'_global_step': global_step_tensor} - outfeed_tensors.update(tensors) - return global_step_host_fn, outfeed_tensors - else: - fn_args = util.fn_args(host_fn) - if len(tensors) != len(fn_args): - raise RuntimeError( - 'In TPUEstimatorSpec.host_call, length of tensors {} does not match ' - 'method args of the function, which takes {}.'.format( - len(tensors), len(fn_args))) - return global_step_host_fn, [global_step_tensor] + list(tensors) - - class _OutfeedHostCallHook(session_run_hook.SessionRunHook): """Hook to run host calls when use_tpu=False.""" -- GitLab From 7c799e11b7b765a8d43e409b32d24dfbb5614bf8 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Wed, 7 Feb 2018 20:50:09 -0800 Subject: [PATCH 0054/2939] Move TPU doc to "using_tpu". Add short titles to some docs. PiperOrigin-RevId: 184941101 --- tensorflow/docs_src/install/leftnav_files | 14 +++++++------- .../docs_src/programmers_guide/debugger.md | 2 +- .../docs_src/programmers_guide/leftnav_files | 9 ++++++--- .../using_tpu.md} | 0 tensorflow/docs_src/tutorials/leftnav_files | 16 ++++++++-------- 5 files changed, 22 insertions(+), 19 deletions(-) rename tensorflow/docs_src/{api_guides/python/TPUEstimator.md => programmers_guide/using_tpu.md} (100%) diff --git a/tensorflow/docs_src/install/leftnav_files b/tensorflow/docs_src/install/leftnav_files index 0e8b5ae7a1..e523e06f67 100644 --- a/tensorflow/docs_src/install/leftnav_files +++ b/tensorflow/docs_src/install/leftnav_files @@ -1,16 +1,16 @@ index.md ### Python -install_linux.md -install_mac.md -install_windows.md -install_sources.md +install_linux.md: Ubuntu +install_mac.md: MacOS +install_windows.md: Windows +install_sources.md: From source >>> migration.md ### Other Languages -install_java.md -install_go.md -install_c.md +install_java.md: Java +install_go.md: Go +install_c.md: C diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index 9eaee27028..dbc4517087 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -1,4 +1,4 @@ -# Debugging TensorFlow Programs +# TensorFlow Debugger diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files index 38de3ccc3e..3fe4cb2dda 100644 --- a/tensorflow/docs_src/programmers_guide/leftnav_files +++ b/tensorflow/docs_src/programmers_guide/leftnav_files @@ -10,7 +10,10 @@ tensors.md variables.md graphs.md saved_model.md + +### Accelerators using_gpu.md +using_tpu.md ### ML Concepts embedding.md @@ -19,9 +22,9 @@ embedding.md debugger.md ### TensorBoard -summaries_and_tensorboard.md -graph_viz.md -tensorboard_histograms.md +summaries_and_tensorboard.md: Visualizing Learning +graph_viz.md: Graphs +tensorboard_histograms.md: Histograms ### Misc version_compat.md diff --git a/tensorflow/docs_src/api_guides/python/TPUEstimator.md b/tensorflow/docs_src/programmers_guide/using_tpu.md similarity index 100% rename from tensorflow/docs_src/api_guides/python/TPUEstimator.md rename to tensorflow/docs_src/programmers_guide/using_tpu.md diff --git a/tensorflow/docs_src/tutorials/leftnav_files b/tensorflow/docs_src/tutorials/leftnav_files index 41ffdc8601..888052428f 100644 --- a/tensorflow/docs_src/tutorials/leftnav_files +++ b/tensorflow/docs_src/tutorials/leftnav_files @@ -1,22 +1,22 @@ index.md ### Images -layers.md -image_recognition.md -image_retraining.md +layers.md: MNIST +image_recognition.md: Image Recognition +image_retraining.md: Image Retraining deep_cnn.md ### Sequences recurrent.md -seq2seq.md -recurrent_quickdraw.md +seq2seq.md: Neural Machine Translation +recurrent_quickdraw.md: Drawing Classification audio_recognition.md ### Data Representation -wide.md -wide_and_deep.md +wide.md: Linear Models +wide_and_deep.md: Wide & Deep Learning word2vec.md -kernel_methods.md +kernel_methods.md: Kernel Methods ### Non-ML mandelbrot.md -- GitLab From 7f982862c68febf8c4e9553a5b848ecd570cb808 Mon Sep 17 00:00:00 2001 From: cclauss Date: Thu, 8 Feb 2018 01:28:06 +0100 Subject: [PATCH 0055/2939] Fix undefined name: import as_str_any for line 35 (#16668) flake8 testing of https://github.com/tensorflow/tensorflow on Python 2.7.14 $ __flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics__ ``` ./tensorflow/python/util/compat_internal.py:33:12: F821 undefined name 'as_str_any' path = as_str_any(path.__fspath__()) ^ ``` --- tensorflow/python/util/compat_internal.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/util/compat_internal.py b/tensorflow/python/util/compat_internal.py index a299b2fc3c..9e60e689d2 100644 --- a/tensorflow/python/util/compat_internal.py +++ b/tensorflow/python/util/compat_internal.py @@ -19,6 +19,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.util.compat import as_str_any + def path_to_str(path): """Returns the file system path representation of a `PathLike` object, else as it is. -- GitLab From 43ecf848478940904e1a2df10df6bfe72163a38d Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 12 Feb 2018 13:00:11 -0800 Subject: [PATCH 0056/2939] Revert "Add checkpoint file prefix check (#14341)" This reverts commit 2a16133061ba3f8fa60c0338cd629f2211f9b17d. --- .../contrib/slim/python/slim/evaluation_test.py | 2 +- tensorflow/python/training/saver.py | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index f5a9299d26..870f504d10 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -236,7 +236,7 @@ class SingleEvaluationTest(test.TestCase): def _prepareCheckpoint(self, checkpoint_path): init_op = control_flow_ops.group(variables.global_variables_initializer(), variables.local_variables_initializer()) - saver = saver_lib.Saver() + saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V1) with self.test_session() as sess: sess.run(init_op) saver.save(sess, checkpoint_path) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 764f840012..3888e9bba4 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1597,9 +1597,9 @@ class Saver(object): [Stripping Default-Valued Attributes](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/README.md#stripping-default-valued-attributes). Returns: - A string: path prefix used for the checkpoint files. If checkpoint - format is V1 and the saver is sharded, this string ends with: - '-?????-of-nnnnn' where 'nnnnn' is the number of shards created. + A string: path prefix used for the checkpoint files. If the saver is + sharded, this string ends with: '-?????-of-nnnnn' where 'nnnnn' + is the number of shards created. If the saver is empty, returns None. Raises: @@ -1749,11 +1749,6 @@ class Saver(object): return if save_path is None: raise ValueError("Can't load save_path when it is None.") - if (os.path.isfile(save_path) and - self._write_version != saver_pb2.SaverDef.V1): - raise ValueError("The specified path: %s is a file." - " Please specify only the path prefix" - " to the checkpoint files." % save_path) logging.info("Restoring parameters from %s", save_path) if context.in_graph_mode(): sess.run(self.saver_def.restore_op_name, -- GitLab From 9d8994ad0f8ba11911ab08d6d755abec40cfb84f Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 9 Feb 2018 14:37:24 -0800 Subject: [PATCH 0057/2939] Remove header dependence on cuda_config.h to fix opensource custom op support. Fixes #14454 Fixes #12860 PiperOrigin-RevId: 185194924 --- tensorflow/core/common_runtime/gpu/gpu_device.cc | 4 ++++ tensorflow/stream_executor/dso_loader.cc | 4 ++++ tensorflow/stream_executor/dso_loader.h | 4 ---- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index 80a5bdbfff..dc92da7738 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -65,6 +65,10 @@ limitations under the License. #include "tensorflow/core/util/env_var.h" #include "tensorflow/core/util/stream_executor_util.h" +#if !defined(PLATFORM_GOOGLE) +#include "cuda/cuda_config.h" +#endif + namespace tensorflow { // Eigen Ops directly allocate memory only for temporary buffers used diff --git a/tensorflow/stream_executor/dso_loader.cc b/tensorflow/stream_executor/dso_loader.cc index d71938634d..d9fe301b8f 100644 --- a/tensorflow/stream_executor/dso_loader.cc +++ b/tensorflow/stream_executor/dso_loader.cc @@ -33,6 +33,10 @@ limitations under the License. #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/platform/port.h" +#if !defined(PLATFORM_GOOGLE) +#include "cuda/cuda_config.h" +#endif + namespace perftools { namespace gputools { namespace internal { diff --git a/tensorflow/stream_executor/dso_loader.h b/tensorflow/stream_executor/dso_loader.h index 9495f7253a..354c7b50b8 100644 --- a/tensorflow/stream_executor/dso_loader.h +++ b/tensorflow/stream_executor/dso_loader.h @@ -28,10 +28,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform.h" #include "tensorflow/stream_executor/platform/mutex.h" -#if !defined(PLATFORM_GOOGLE) -#include "cuda/cuda_config.h" -#endif - namespace perftools { namespace gputools { namespace internal { -- GitLab From 0b8492b612eef6057440c4d1fe5dca41cacf5d6d Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 12 Feb 2018 18:40:07 -0800 Subject: [PATCH 0058/2939] Debugging calibration --- .../contrib/tensorrt/convert/convert_graph.cc | 28 +++- .../contrib/tensorrt/convert/convert_graph.h | 5 +- .../contrib/tensorrt/convert/convert_nodes.cc | 147 +++++++++++++++++- .../contrib/tensorrt/convert/convert_nodes.h | 8 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 22 +-- .../contrib/tensorrt/python/__init__.py | 1 + .../contrib/tensorrt/python/trt_convert.py | 20 ++- .../tensorrt/resources/TRTInt8Calibrator.cc | 65 +++++++- .../tensorrt/resources/TRTInt8Calibrator.h | 9 +- tensorflow/contrib/tensorrt/trt_conversion.i | 135 ++++++++++------ 10 files changed, 363 insertions(+), 77 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 494920fb7c..8aa4e42fa6 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -216,11 +216,11 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { TF_RETURN_IF_ERROR(status); for (auto in_edge: params->subgraph_incoming_edges) { // loop over incoming edges and attach them to calib node - tensorflow::Node* src_node = in_edge->src(); + // tensorflow::Node* src_node = in_edge->src(); auto src_output=in_edge->src_output(); auto dst_node=in_edge->dst(); auto dst_input=in_edge->dst_input(); - VLOG(0)<<" update edge "<name()<<":"< "<name()<<":"<name()<<":"< "<name()<<":"<graph.UpdateEdge(trt_node, src_output, dst_node, dst_input); } @@ -330,6 +330,30 @@ tensorflow::Status BuildNodeMap( } } // namespace +tensorflow::Status ConvertCalibGraphToInferGraph( + const tensorflow::GraphDef& graph_def, + tensorflow::GraphDef* infer_graph){ + VLOG(0)<<"Starting Calib Conversion"; + tensorflow::Graph graph(tensorflow::OpRegistry::Global()); + TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( + tensorflow::GraphConstructorOptions(), graph_def, &graph)); + // get calib nodes + std::vector calibNodes; + for(auto node : graph.op_nodes()){ + if(node->type_string()=="TRTCalibOp"){ + VLOG(1)<<"Found Calib Node"; + calibNodes.push_back(node); + } + } + VLOG(0)<<"Num Calib nodes in graph= "<& output_names, size_t max_batch_size, diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 4e70fb00f9..588cecf8dd 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/logging.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) @@ -299,6 +300,11 @@ std::vector TFAttrs::get>(std::string key) const { return std::vector(attr.begin(), attr.end()); } template <> +std::vector TFAttrs::get>(std::string key) const { + auto attr = this->at(key)->list().s(); + return std::vector(attr.begin(), attr.end()); +} +template <> nvinfer1::Dims TFAttrs::get(std::string key) const { auto values = this->get>(key); nvinfer1::Dims dims; @@ -1938,6 +1944,125 @@ void Converter::register_op_converters() { tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { return tensorflow::errors::Unimplemented("Not implemented yet"); } +tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph &graph, + tensorflow::Node *c_node) { + const auto ndef=c_node->def(); + + TFAttrs attrs(ndef); + std::vector segment_nodes(attrs.get>("segment_nodes")); + std::vector output_nodes(attrs.get>("segment_output_names")); + std::vector input_names(attrs.get>("input_names")); + std::string res_name = attrs.get("resource_name"); + VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; + std::string engine_name="my_trt_op"; + { + const auto node_id=tensorflow::str_util::Split(res_name,"_"); + engine_name+=node_id.back(); + } + std::map nodeMaps; + + for(auto n: graph.op_nodes()){ + nodeMaps.insert({n->name(),n}); + } + VLOG(1)<<"Output Nodes:"; + std::vector out_types; + std::vector out_edges; + for(auto &i : output_nodes ){ + auto node_port=tensorflow::str_util::Split(i,":"); + VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); + auto out_node_name = node_port.at(0); + if(node_port.size()>1){ + VLOG(1) << "Multi port output" << node_port.at(0) << + " " << node_port.at(1) << " size=" << node_port.size(); + } + auto nodeIt=nodeMaps.find(out_node_name); + if(nodeIt!=nodeMaps.end()){ + tensorflow::Node* outNode=nodeIt->second; + int port=0; + if(node_port.size()==2){ + port=std::strtoul(node_port.at(1).c_str(),nullptr,10); + out_types.push_back(outNode->output_type(port)); + }else{ + out_types.push_back(outNode->output_type(0)); + } + for(auto outEdge : outNode->out_edges()){ + if(outEdge->src_output()==port){ + out_edges.push_back(outEdge); + break; + } + } + }else{ + LOG(WARNING)<<" couldn't find output node "<getManager("TRTCalibOps"); + tensorflow::trt::TRTCalibrationResource* calibRes = nullptr; + auto status = resmgr->Lookup(res_name, res_name, &calibRes); + if(!status.ok() || !calibRes->calibrator){ + return tensorflow::errors::FailedPrecondition("You must run calibration"\ + " and inference conversion in the same proces"); + } + + calibRes->calibrator->setDone(); + VLOG(1)<<"Waiting for calibration thread to join"; + calibRes->thr->join(); + delete calibRes->thr; + if(!calibRes->engine){ + LOG(FATAL)<<"Calibration failed!, engine is nullptr"; + } + auto engine_plan_string=calibRes->engine->serialize(); + calibRes->engine->destroy(); + calibRes->network->destroy(); + calibRes->builder->destroy(); + calibRes->thr= nullptr; + calibRes->engine= nullptr; + calibRes->builder= nullptr; + tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); + std::vector income_edges; + for(const auto in_edge : c_node->in_edges()){ + auto src=in_edge->src(); + int dest_port=in_edge->dst_input(); + income_edges.emplace_back(src->name(),in_edge->src_output(),c_node->input_type(dest_port)); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder.Input(input_list); + tensorflow::NodeDef engine_node; + status = op_builder.Attr("serialized_engine", engine_plan_string) + .Attr("input_nodes", input_names) + .Attr("output_nodes", output_nodes) + .Attr("OutT", out_types) + .Finalize(&engine_node); + if(!status.ok()){ + LOG(ERROR)<<"Engine Node creation failed"; + return status; + } + auto trt_engine_node=graph.AddNode(engine_node,&status); + TF_CHECK_OK(status); + for(size_t i=0;idst()->name() << " port " + << out_edges.at(i)->dst_input(); + TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, + out_edges.at(i)->dst(), + out_edges.at(i)->dst_input())); + } + VLOG(1) << "Segment nodes:"; + for (auto &i : segment_nodes){ + VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); + auto it=nodeMaps.find(i); + if(it!=nodeMaps.end()){ + graph.RemoveNode(it->second); + } + } + return tensorflow::Status::OK(); +} tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Visit nodes in reverse topological order and construct the TRT network. @@ -1958,13 +2083,15 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { LOG(DEBUG) << "BUILDING 1"; static int static_id = 0; std::string calib_op_name = - std::string("my_trt_calib_op_") + std::to_string(static_id++); - + std::string("my_trt_calib_op_") + std::to_string(static_id); + std::string engine_name = + std::string("my_trt_op") + std::to_string(static_id); + static_id++; LOG(DEBUG) << "BUILDING 2"; auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); auto op_res = new tensorflow::trt::TRTCalibrationResource(); - VLOG(0)<<"SAMI Creating calibresource "<Create(calib_op_name, calib_op_name, op_res)); op_res->logger = new tensorflow::tensorrt::Logger(); op_res->builder = nvinfer1::createInferBuilder(*(op_res->logger)); @@ -2065,15 +2192,23 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Gather output metadata std::vector output_names; std::vector output_dtypes; + int trt_engine_op_output_idx = 0; for (std::pair const& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); std::string op_name = node->name(); std::string tensor_name = op_name; + + s.output_edge_map->insert( + {trt_engine_op_output_idx == 0 + ? engine_name + : engine_name + ":" + std::to_string(trt_engine_op_output_idx), + {output_idx, tensor_name}}); + trt_engine_op_output_idx++; if (output_idx != 0) tensor_name = tensor_name + ":" + std::to_string(output_idx); - LOG(DEBUG) << "output tensor name: " << tensor_name; + VLOG(1) << "output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { @@ -2083,7 +2218,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); + tensor_name); } converter.network()->markOutput(*tensor); tensorflow::DataType tf_dtype = node->output_type(output_idx); @@ -2109,7 +2244,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( input_names.at(i), output_idx, input_dtypes.at(i)); - VLOG(0) << calib_op_name << " input " << i << " = " << input_names.at(i) + VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) << ":" << output_idx <<" dType= "<< tensorflow::DataTypeString(input_dtypes.at(i)); income_edges.push_back(incoming_edge); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 2f754968dc..71f61e2dc4 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -31,7 +31,7 @@ namespace tensorrt { namespace convert { struct SubGraphParams { - SubGraphParams(const tensorflow::Graph& graph_, + SubGraphParams(tensorflow::Graph& graph_, const std::set& subgraph_node_ids_, const std::vector>& input_inds_, const std::vector>& output_inds_, @@ -52,7 +52,7 @@ struct SubGraphParams { trt_node(trt_node_), int8(int8_) {} - const tensorflow::Graph& graph; + tensorflow::Graph& graph; const std::set& subgraph_node_ids; const std::vector>& input_inds; // {node_id, output_idx} const std::vector>& output_inds; // {node_id, output_idx} @@ -64,8 +64,10 @@ struct SubGraphParams { const bool int8; }; -tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); +tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams ¶ms); tensorflow::Status InjectCalibrationNode(SubGraphParams& params); +tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, + tensorflow::Node* c_node); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index 4996b3cd40..41906b6090 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -43,23 +43,22 @@ TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context) { } void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { auto trt_rm = tensorflow::trt::TRTResourceManager::instance(); - VLOG(0) << "Op Name= " << name() << " nodedef name= " << repo_name; + VLOG(2) << "Op Name= " << name() << " nodedef name= " << repo_name; auto resmgr = trt_rm->getManager("TRTCalibOps"); tensorflow::trt::TRTCalibrationResource* calibRes = nullptr; auto status = resmgr->Lookup(repo_name, repo_name, &calibRes); - VLOG(0) << "SAMI status " << status.ToString(); if (status.ok()) { int batchSize = ctx->input(0).dim_size(0); - VLOG(0) << "SAMI Batchsize= " << batchSize; + VLOG(2) << "SAMI Batchsize= " << batchSize; int numInputs = ctx->num_inputs(); - VLOG(0) << "SAMI numInputs= " << numInputs; + VLOG(2) << "SAMI numInputs= " << numInputs; dev_tensors_.resize(numInputs); if (calibRes->calibrator == nullptr) { - VLOG(0) << " Constructing calibrator"; + VLOG(1) << " Constructing calibrator"; // first run for (int i = 0; i < numInputs; i++) { const tensorflow::Tensor& t = ctx->input(i); - VLOG(0) << "Tensor " << i << " " << t.shape().DebugString(); + VLOG(1) << "Tensor " << i << " " << t.shape().DebugString(); OP_REQUIRES_OK(ctx, ctx->allocate_persistent(t.dtype(), t.shape(), &dev_tensors_.at(i), nullptr)); @@ -73,11 +72,14 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { } calibRes->calibrator = new TRTInt8Calibrator(device_buffers_, batchSize); calibRes->thr = new std::thread([calibRes]() { + VLOG(0)<<"Starting calibration thread, Calibration Resource @ "<builder->setInt8Calibrator(calibRes->calibrator); + calibRes->builder->setInt8Mode(true); calibRes->engine = calibRes->builder->buildCudaEngine( *calibRes->network); // will loop until we terminate calibrator - VLOG(1) << "Calibration loop terminated"; + VLOG(0) << "SAMI Calibration loop terminated"; }); - VLOG(0) << "SAMI intialized calibrator resource"; + VLOG(0) << "SAMI initialized calibrator resource"; } std::unordered_map input_data; @@ -92,9 +94,9 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { input_data.emplace(input_names_.at(i), data_address); ctx->set_output(i, t); } - VLOG(0) << "Filled map"; + VLOG(1) << "Filled map for sending"; calibRes->calibrator->setBatch(input_data); - VLOG(0) << "Passed calibration data"; + VLOG(1) << "Passed calibration data"; } else { ctx->SetStatus(status); return; diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 4aeea48515..9eb589664c 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -5,4 +5,5 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.tensorrt.python.ops import trt_engine_op from tensorflow.contrib.tensorrt.python.trt_convert import CreateInferenceGraph +from tensorflow.contrib.tensorrt.python.trt_convert import CalibGraphToInferGraph # pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 5aba371a03..18ea6c83cc 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl -from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert +from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert,calib_convert from tensorflow.python.util import compat import tensorflow as tf from tensorflow.python.grappler import tf_optimizer @@ -91,3 +91,21 @@ def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string #save some memory return output_graph_def + +def CalibGraphToInferGraph(calibration_graph_def): + graph_str=calibration_graph_def.SerializeToString() + out=calib_convert(graph_str) + status=out[0] + output_graph_def_string = out[1] + del graph_str #save some memory + if len(status) < 2: + raise _impl.UnknownError(None,None,status) + if status[:2] != "OK": + msg=status.split(";") + if len(msg) == 1: + raise RuntimeError("Status message is malformed {}".format(status)) + raise _impl._make_specific_exception(None,None,";".join(msg[1:]), int(msg[0])) + output_graph_def = graph_pb2.GraphDef() + output_graph_def.ParseFromString(output_graph_def_string) + del output_graph_def_string #save some memory + return output_graph_def diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc index 10d9350d7a..e1ab243b07 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -5,6 +5,10 @@ #include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" #include +#include +#include +#include + #include "tensorflow/core/platform/logging.h" namespace tensorflow { @@ -12,26 +16,67 @@ namespace trt { // set the batch size before constructing the thread to execute engine int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } +TRTInt8Calibrator::TRTInt8Calibrator(const std::unordered_map< + std::string, std::pair>& dev_buffers, + int batch_size) + : batch_size_(batch_size), + done_(false), + dev_buffers_(dev_buffers), + calib_running_(false){ + cudaPointerAttributes pa; + int devid=-1; + cudaGetDevice(&devid); + VLOG(0)<<"Constructing calibrator with batch size "<& data) { + VLOG(1)<<"SAMI SAMI Waiting to set new batch"; + if(done_)return false; while (calib_running_.load( std::memory_order_acquire)) { // wait while calibration is running tensorflow::mutex_lock l(cond_mtx_); cond_.wait_for(l, std::chrono::milliseconds(50)); + if(done_)return false; } + VLOG(1)<<"Set Batch Waiting finished"; for (const auto it : data) { + auto devptr = dev_buffers_.find(it.first); if (devptr == dev_buffers_.end()) { LOG(FATAL) << "FATAL input name '" << it.first << "' does not match with the buffer names"; } + cudaPointerAttributes pa; const auto& d = devptr->second; + VLOG(1)<<"cuda memcopy buff name= "<second.first; bindings[i] = it->second.first; + float f[2]; + f[0]=3.; + f[1]=0.14159; + auto status=cudaMemcpy(f,bindings[i],sizeof(float)*2,cudaMemcpyDeviceToHost); + int devid=-1; + cudaGetDevice(&devid); + VLOG(0)<<"SAMI ORDER GETTING, Data in perm storage [0]="<>& dev_buffers, - int batch_size) - : batch_size_(batch_size), - done_(false), - dev_buffers_(dev_buffers), - calib_running_(false){}; + int batch_size); int getBatchSize() const; bool getBatch(void* bindings[], const char* names[], int nbBindings) override; bool setBatch(const std::unordered_map &data); void setDone(){done_=true;} const void *readCalibrationCache(std::size_t &length) override; void writeCalibrationCache(const void *ptr, std::size_t length) override; + ~TRTInt8Calibrator(); private: int batch_size_; tensorflow::mutex cond_mtx_; tensorflow::condition_variable cond_; bool done_; - std::unordered_map> dev_buffers_; + const std::unordered_map> dev_buffers_; std::atomic_bool calib_running_; }; } // namespace trt diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 3e8baf91ae..ee87d7fae1 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -23,58 +23,98 @@ %ignoreall %unignore tensorflow; %unignore trt_convert; +%unignore calib_convert; %{ - std::pair trt_convert(string graph_def_string,//const tensorflow::GraphDef& - std::vector output_names, - size_t max_batch_size, - size_t max_workspace_size_bytes, - bool int8 - // unfortunately we can't use TF_Status here since it - // is in c/c_api and brings in a lot of other libraries - // which in turn declare ops. These ops are included - // statically in our library and cause an abort when - // module is loaded due to double registration - // until Tensorflow properly exposes these headers - // we have to work around this by returning a string - // and converting it to exception on python side. - //,TF_Status* out_status) { - ) { - string out_status; +std::pair trt_convert(string graph_def_string,//const tensorflow::GraphDef& + std::vector output_names, + size_t max_batch_size, + size_t max_workspace_size_bytes, + bool int8 + // unfortunately we can't use TF_Status here since it + // is in c/c_api and brings in a lot of other libraries + // which in turn declare ops. These ops are included + // statically in our library and cause an abort when + // module is loaded due to double registration + // until Tensorflow properly exposes these headers + // we have to work around this by returning a string + // and converting it to exception on python side. + //,TF_Status* out_status) { +) { + string out_status; - tensorflow::GraphDef graph_def; - if (!graph_def.ParseFromString(graph_def_string)) { - out_status="InvalidArgument;Couldn't interpret input as a GraphDef"; - return std::pair{out_status,""}; - } + tensorflow::GraphDef graph_def; + if (!graph_def.ParseFromString(graph_def_string)) { + out_status="InvalidArgument;Couldn't interpret input as a GraphDef"; + return std::pair{out_status,""}; + } - if (!output_names.size()) { - out_status="InvalidArgument;Size of the output_names vector is 0"; - return std::pair{out_status,""}; - //return ""; - } - tensorflow::GraphDef outGraph; - tensorflow::Status conversion_status = + if (!output_names.size()) { + out_status="InvalidArgument;Size of the output_names vector is 0"; + return std::pair{out_status,""}; + //return ""; + } + tensorflow::GraphDef outGraph; + tensorflow::Status conversion_status = tensorrt::convert::ConvertGraphDefToTensorRT(graph_def, - output_names, - max_batch_size, - max_workspace_size_bytes, - &outGraph,int8); - if (!conversion_status.ok()) { - auto retCode=(int)conversion_status.code(); - char buff[2000]; - snprintf(buff,2000,"%d;%s",retCode,conversion_status.error_message().c_str()); - out_status=buff; - return std::pair{out_status,""}; - } - string result; - if (!outGraph.SerializeToString(&result)) { - out_status="InvalidArgument;Couldn't serialize output as a GraphDef"; - return std::pair{out_status,""}; - } - out_status="OK;All good!"; - return std::pair{out_status,result}; + output_names, + max_batch_size, + max_workspace_size_bytes, + &outGraph,int8); + if (!conversion_status.ok()) { + auto retCode=(int)conversion_status.code(); + char buff[2000]; + snprintf(buff,2000,"%d;%s",retCode,conversion_status.error_message().c_str()); + out_status=buff; + return std::pair{out_status,""}; + } + string result; + if (!outGraph.SerializeToString(&result)) { + out_status="InvalidArgument;Couldn't serialize output as a GraphDef"; + return std::pair{out_status,""}; + } + out_status="OK;All good!"; + return std::pair{out_status,result}; +} + +std::pair calib_convert(string graph_def_string // const tensorflow::GraphDef& + // unfortunately we can't use TF_Status here since it + // is in c/c_api and brings in a lot of other libraries + // which in turn declare ops. These ops are included + // statically in our library and cause an abort when + // module is loaded due to double registration + // until Tensorflow properly exposes these headers + // we have to work around this by returning a string + // and converting it to exception on python side. + //,TF_Status* out_status) { +) { + string out_status; + + tensorflow::GraphDef graph_def; + if (!graph_def.ParseFromString(graph_def_string)) { + out_status="InvalidArgument;Couldn't interpret input as a GraphDef"; + return std::pair{out_status,""}; } + + tensorflow::GraphDef outGraph; + tensorflow::Status conversion_status = + tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, + &outGraph); + if (!conversion_status.ok()) { + auto retCode=(int)conversion_status.code(); + char buff[2000]; + snprintf(buff,2000,"%d;%s",retCode,conversion_status.error_message().c_str()); + out_status=buff; + return std::pair{out_status,""}; + } + string result; + if (!outGraph.SerializeToString(&result)) { + out_status="InvalidArgument;Couldn't serialize output as a GraphDef"; + return std::pair{out_status,""}; + } + out_status="OK;All good!"; + return std::pair{out_status,result}; +} %} std::pair trt_convert(string graph_def_string, @@ -82,4 +122,7 @@ std::pair trt_convert(string graph_def_string, size_t max_batch_size, size_t max_workspace_size,bool int8); +std::pair calib_convert(string graph_def_string); + + %unignoreall -- GitLab From b6c2273f0a7735b9850080eefd26b6056b4d5f42 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Wed, 7 Feb 2018 17:19:11 -0500 Subject: [PATCH 0059/2939] Bump JetPack default to 3.2 in Android build script (#16842) --- tensorflow/contrib/makefile/build_all_android.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh index f67c516186..fc88f59e09 100755 --- a/tensorflow/contrib/makefile/build_all_android.sh +++ b/tensorflow/contrib/makefile/build_all_android.sh @@ -52,7 +52,7 @@ shift $((OPTIND - 1)) if [ "$ARCH" == "tegra" ]; then if [[ -z "${JETPACK}" ]]; then - export JETPACK="$HOME/JetPack_Android_3.0" + export JETPACK="$HOME/JetPack_Android_3.2" fi if [ ! -d ${JETPACK} ]; then echo "Can't find Jetpack at ${JETPACK}" -- GitLab From 22ff574ee9d4272995ddcc6aaac70479b7e6e17c Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Tue, 13 Feb 2018 18:20:07 -0500 Subject: [PATCH 0060/2939] Add instructions for building CUDA-enabled Android TensorFlow (#16961) * Add instructions for building CUDA-enabled Android TensorFlow * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md --- tensorflow/contrib/android/README.md | 5 ++ tensorflow/contrib/makefile/README.md | 99 +++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/tensorflow/contrib/android/README.md b/tensorflow/contrib/android/README.md index b8d73bf24c..db37bcf73d 100644 --- a/tensorflow/contrib/android/README.md +++ b/tensorflow/contrib/android/README.md @@ -81,6 +81,11 @@ For documentation on building a self-contained AAR file with cmake, see [tensorflow/contrib/android/cmake](cmake). +### Makefile + +For documentation on building native TF libraries with make, including a CUDA-enabled variant for devices like the Nvidia Shield TV, see [tensorflow/contrib/makefile/README.md](../makefile/README.md) + + ## AssetManagerFileSystem This directory also contains a TensorFlow filesystem supporting the Android diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 0613de2cab..9758ee1c47 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -130,6 +130,105 @@ adb shell '/data/local/tmp/benchmark \ For more details, see the [benchmark documentation](../../tools/benchmark). +## CUDA support for Tegra devices running Android (Nvidia Shield TV, etc) + +With the release of TF 1.6 and JetPack for Android 3.2 (currently pending), you can now build a version of TensorFlow for compatible devices according to the following instructions which will receive the full benefits of GPU acceleration. + +#### Environment setup: + +First, download and install JetPack for Android version 3.2 or greater from [Nvidia](https://developers.nvidia.com). Note that as of the TF 1.6 release the JetPack for Android 3.2 release is still pending, and regular JetPack for L4T will not work. + +```bash +git clone https://github.com/tensorflow/tensorflow.git +cd tensorflow +JETPACK=$HOME/JetPack_Android_3.2 +TEGRA_LIBS="$JETPACK/cuDNN/aarch64/cuda/lib64/libcudnn.so $JETPACK/cuda-9.0/extras/CUPTI/lib64/libcupti.so $JETPACK/cuda/targets/aarch64-linux-androideabi/lib64/libcufft.so" +``` + +#### Building all CUDA-enabled native binaries: +This will build CUDA-enabled versions of libtensorflow_inference.so and the benchmark binary. (libtensorflow_demo.so will also be built incidentally, but it does not support CUDA) + +```bash +NDK_ROOT=$JETPACK/android-ndk-r13b +CC_PREFIX=ccache tensorflow/contrib/makefile/build_all_android.sh -s tensorflow/contrib/makefile/sub_makefiles/android/Makefile.in -t "libtensorflow_inference.so libtensorflow_demo.so all" -a tegra +``` +(add -T on subsequent builds to skip protobuf downloading/building) + + +#### Testing the the CUDA-enabled benchmark via adb: +Build binaries first as above, then run: + +```bash +adb shell mkdir -p /data/local/tmp/lib64 +adb push $TEGRA_LIBS /data/local/tmp/lib64 +adb push tensorflow/contrib/makefile/gen/bin/android_arm64-v8a/benchmark /data/local/tmp +wget https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk +unzip tensorflow_demo.apk -d /tmp/tensorflow_demo +adb push /tmp/tensorflow_demo/assets/*.pb /data/local/tmp +adb shell "LD_LIBRARY_PATH=/data/local/tmp/lib64 /data/local/tmp/benchmark --graph=/data/local/tmp/tensorflow_inception_graph.pb" +``` + +#### Building the CUDA-enabled TensorFlow AAR with Bazel: +Build the native binaries first as above. Then, build the aar and package the native libs by executing the following: +```bash +mkdir -p /tmp/tf/jni/arm64-v8a +cp tensorflow/contrib/makefile/gen/lib/android_tegra/libtensorflow_*.so /tmp/tf/jni/arm64-v8a/ +cp $TEGRA_LIBS /tmp/tf/jni/arm64-v8a +bazel build //tensorflow/contrib/android:android_tensorflow_inference_java.aar +cp bazel-bin/tensorflow/contrib/android/android_tensorflow_inference_java.aar /tmp/tf/tensorflow.aar +cd /tmp/tf +chmod +w tensorflow.aar +zip -ur tensorflow.aar $(find jni -name *.so) +``` + +#### Building the CUDA-enabled TensorFlow Android demo with Bazel: +Build binaries first as above, then edit tensorflow/examples/android/BUILD and replace: +``` + srcs = [ + ":libtensorflow_demo.so", + "//tensorflow/contrib/android:libtensorflow_inference.so", + ], +``` +with: +``` +srcs = glob(["libs/arm64-v8a/*.so"]), +``` + +Then run: +```bash +# Create dir for native libs +mkdir -p tensorflow/examples/android/libs/arm64-v8a + +# Copy JetPack libs +cp $TEGRA_LIBS tensorflow/examples/android/libs/arm64-v8a + +# Copy native TensorFlow libraries +cp tensorflow/contrib/makefile/gen/lib/android_arm64-v8a/libtensorflow_*.so tensorflow/examples/android/libs/arm64-v8a/ + +# Build APK +bazel build -c opt --fat_apk_cpu=arm64-v8a tensorflow/android:tensorflow_demo + +# Install +adb install -r -f bazel-bin/tensorflow/examples/android/tensorflow_demo.apk +``` + +#### Building the CUDA-enabled Android demo with gradle/Android Studio: + +Add tensorflow/examples/android as an Android project in Android Studio as normal. + +Edit build.gradle and: +* set nativeBuildSystem = 'makefile' +* set cpuType = 'arm64-v8a' +* in "buildNativeMake", replace cpuType with 'tegra' (optional speedups like -T and ccache also work) +* set the environment "NDK_ROOT" var to $JETPACK/android-ndk-r13b + +Click "build apk" to build. + +Install: +```bash +adb install -r -f tensorflow/examples/android/gradleBuild/outputs/apk/debug/android-debug.apk +``` + ## iOS _Note: To use this library in an iOS application, see related instructions in -- GitLab From ecec1d8c8e557656e7e5c4034604ca6f20d2d3c2 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Tue, 13 Feb 2018 18:49:44 -0500 Subject: [PATCH 0061/2939] Update RELEASE.md --- RELEASE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE.md b/RELEASE.md index de4a34bb04..1a037ce595 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -8,6 +8,7 @@ * New Optimizer internal API for non-slot variables. Descendants of AdamOptimizer that access _beta[12]_power will need to be updated. * `tf.estimator.{FinalExporter,LatestExporter}` now export stripped SavedModels. This improves forward compatibility of the SavedModel. * FFT support added to XLA CPU/GPU. +* Android TF can now be built with CUDA acceleration on compatible Tegra devices (see [contrib/makefile/README.md](contrib/makefile/README.md) for more information) ## Bug Fixes and Other Changes * Documentation updates: -- GitLab From ca19b32e4d1574ad29e36dbc164c320aeca80d47 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Wed, 14 Feb 2018 00:13:00 -0800 Subject: [PATCH 0062/2939] cifar 10 divergance fix and batchnorm unit test fix --- .../core/kernels/mkl_fused_batch_norm_op.cc | 96 +++++++++++++------ tensorflow/core/kernels/mkl_relu_op.cc | 20 +++- 2 files changed, 81 insertions(+), 35 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 8313224d7f..b7dee3fb3e 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -1110,19 +1110,12 @@ class MklFusedBatchNormGradOp : public OpKernel { return; } - if (dnn_shape_src.IsMklTensor()) - depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); - else - ExtractParams(context); - - memory::format format_m; if (dnn_shape_src.IsMklTensor()) { - if (dnn_shape_src.IsTensorInNCHWFormat()) - format_m = memory::format::nchw; - else - format_m = memory::format::nhwc; + depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); + } else if (dnn_shape_diff_dst.IsMklTensor()) { + depth_ = dnn_shape_diff_dst.DimSize(MklDnnDims::Dim_C); } else { - format_m = TFDataFormatToMklDnnDataFormat(tensor_format_); + ExtractParams(context); } MklDnnData src(&cpu_engine); @@ -1146,20 +1139,20 @@ class MklFusedBatchNormGradOp : public OpKernel { diff_dst_dims = TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(), tensor_format_); - // set src and diff_dst primitives + // set src and diff_dst primitives according to input layout memory::desc src_md({}, memory::data_undef, memory::format_undef); memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef); - if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { - if (dnn_shape_src.IsMklTensor()) { - src_md = dnn_shape_src.GetMklLayout(); - diff_dst_md = src_md; - } else { - diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); - src_md = diff_dst_md; - } + if (dnn_shape_src.IsMklTensor()) { + src_md = dnn_shape_src.GetMklLayout(); } else { - src_md = memory::desc(src_dims, MklDnnType(), format_m); - diff_dst_md = src_md; + src_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + if (dnn_shape_diff_dst.IsMklTensor()) { + diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); + } else { + diff_dst_md = memory::desc(diff_dst_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); } src.SetUsrMem(src_md, &src_tensor); diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); @@ -1211,28 +1204,64 @@ class MklFusedBatchNormGradOp : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + + // MKL-DNN's BN primitive not provide API to fetch internal format + // set common_md as OpMem + // src and diff_dst will reorder to common_md + // diff_src will set as common_md + memory::desc common_md({}, memory::data_undef, memory::format_undef); + if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor()) { + common_md = dnn_shape_src.GetMklLayout(); + } else { + common_md = dnn_shape_diff_dst.GetMklLayout(); + } + } else { + common_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + // if any of src and diff_dst as mkl layout, + // then we set diff_src as mkl layout + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); - auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc(); + // set diff_src's mkl layout as common_md + auto diff_src_pd = memory::primitive_desc(common_md, cpu_engine); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), src_dims, - format_m); - dnn_shape_diff_src.SetTfDimOrder(dnn_shape_src.GetDimension(), - tensor_format_); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_src.GetDimension(), + src_dims, + dnn_shape_src.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_src.GetDimension(), + tensor_format_); + } else { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_diff_dst.GetDimension(), + src_dims, + dnn_shape_diff_dst.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_diff_dst.GetDimension(), + tensor_format_); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyont should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, tf_shape_diff_src, dnn_shape_diff_src); - diff_src.SetUsrMem(src_md, diff_src_tensor); + // set diff_src + diff_src.SetUsrMem(common_md, diff_src_tensor); prop_kind pk = prop_kind::backward; auto bnrm_bwd_desc = batch_normalization_backward::desc( - pk, diff_src.GetUsrMemDesc(), src.GetUsrMemDesc(), epsilon_, + pk, common_md, common_md, epsilon_, /* for inference, specify use_global_stats 1. on fwd prop, use mean and variance provided as inputs @@ -1245,11 +1274,16 @@ class MklFusedBatchNormGradOp : public OpKernel { auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc( bnrm_bwd_desc, cpu_engine, bnrm_fwd_pd); + std::vector net; + src.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + diff_dst.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + auto bnrm_bwd_op = batch_normalization_backward( bnrm_bwd_pd, src.GetOpMem(), mean.GetOpMem(), variance.GetOpMem(), diff_dst.GetOpMem(), weights_m, diff_src.GetOpMem(), diff_weights_m); - std::vector net; net.push_back(bnrm_bwd_op); stream(stream::kind::eager).submit(net).wait(); diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 51db3991e2..924b9da7e0 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -368,8 +368,11 @@ void MklReluGradOp::Compute(OpKernelContext* context) { mkl_context.MklCleanup(); } + + #else // INTEL_MKL_ML + template class MklReluOpBase : public OpKernel { public: @@ -579,17 +582,26 @@ class MklReluGradOpBase : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc(); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + } else { + dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), + dnn_shape_diff_dst.GetSizesAsMklDnnDims(), + dnn_shape_diff_dst.GetTfDataFormat()); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyone should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, -- GitLab From 736e8c4ccb16718d11cf7c8e1fac843bf6e388a7 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Wed, 14 Feb 2018 18:26:20 +0900 Subject: [PATCH 0063/2939] fix typo --- tensorflow/core/lib/io/record_writer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/lib/io/record_writer.cc b/tensorflow/core/lib/io/record_writer.cc index 3657243c5d..ebc5648269 100644 --- a/tensorflow/core/lib/io/record_writer.cc +++ b/tensorflow/core/lib/io/record_writer.cc @@ -49,7 +49,7 @@ RecordWriterOptions RecordWriterOptions::CreateRecordWriterOptions( #endif // IS_SLIM_BUILD } else if (compression_type != compression::kNone) { LOG(ERROR) << "Unsupported compression_type:" << compression_type - << ". No comprression will be used."; + << ". No compression will be used."; } return options; } -- GitLab From 617fa4e5fa634270c36a2a8762e6ce96bd38f2f8 Mon Sep 17 00:00:00 2001 From: ManHyuk Date: Wed, 14 Feb 2018 18:35:31 +0900 Subject: [PATCH 0064/2939] fix typo --- tensorflow/contrib/makefile/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index b0228c5435..995230dfa8 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -155,7 +155,7 @@ CC_PREFIX=ccache tensorflow/contrib/makefile/build_all_android.sh -s tensorflow/ (add -T on subsequent builds to skip protobuf downloading/building) -#### Testing the the CUDA-enabled benchmark via adb: +#### Testing the CUDA-enabled benchmark via adb: Build binaries first as above, then run: ```bash -- GitLab From 15f3b920ad7eb7fcca3afee14d16049db2046d4b Mon Sep 17 00:00:00 2001 From: Nathan Luehr Date: Wed, 14 Feb 2018 16:27:23 -0800 Subject: [PATCH 0065/2939] Fix __shared__ types with non-empty constructor std::complex has a non-empty constructor (zero assignment) that is not compatible with CUDA __shared__ memory. This fixes current reliance on undefined behavior. (and removes an unnecessary run-time initialization). --- .../core/kernels/reduction_gpu_kernels.cu.h | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 15ae4c1fc5..95a3e222b5 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -244,6 +244,33 @@ __global__ void RowReduceKernel( if (row < num_rows && lane == 0) out[row] = sum; } +template +struct storage_type { + T1 val; + __host__ __device__ storage_type() {} + __host__ __device__ operator T1() { return val; } + __host__ __device__ storage_type& operator=(const T1& in) { + val = in; + return *this; + } +}; + +template +struct storage_type> { + T2 real; + T2 imag; + __host__ __device__ storage_type() {} + __host__ __device__ operator std::complex() { + return std::complex(real, imag); + } + __host__ __device__ storage_type>& operator=( + const std::complex& in) { + real = in.real(); + imag = in.imag(); + return *this; + } +}; + // Works only if there are <= 16 columns // each warps sums over multiple rows at once template @@ -268,7 +295,7 @@ __global__ void ColumnReduceMax16ColumnsKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += rows_per_warp * gridDim.y * blockDim.y; for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { @@ -294,7 +321,8 @@ __global__ void ColumnReduceMax16ColumnsKernel( if (blockDim.y > 1) { for (int row = 1; row < blockDim.y; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } } @@ -316,7 +344,7 @@ __global__ void ColumnReduceKernel( // 1D array necessary due to bug in CUDA 9 compiler. // TODO(nluehr) revert to 2D array when compiler is ready. - __shared__ value_type partial_sums[32 * 33]; + __shared__ storage_type partial_sums[32 * 33]; row += gridDim.y * blockDim.y; @@ -347,7 +375,8 @@ __global__ void ColumnReduceKernel( min(blockDim.y, num_rows - blockIdx.y * blockDim.y); for (int row = 1; row < numRowsThisBlock; ++row) { - s = op(s, partial_sums[threadIdx.x * 33 + row]); + value_type t = partial_sums[threadIdx.x * 33 + row]; + s = op(s, t); } out[col * gridDim.y + blockIdx.y] = s; -- GitLab From b81aaac898d93e17b4a280bb02547d2a60d490cb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 15 Feb 2018 08:28:12 +0000 Subject: [PATCH 0066/2939] Fix warnings in tf.contrib.bayesflow.monte_carlo.expectation This fix fixes several warnings in tf.contrib.bayesflow.monte_carlo.expectation by switching to keepdims for tf.reduce_mean. Signed-off-by: Yong Tang --- tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py index 985177e897..5263e87ae6 100644 --- a/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/monte_carlo_impl.py @@ -328,7 +328,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, if not callable(f): raise ValueError('`f` must be a callable function.') if use_reparametrization: - return math_ops.reduce_mean(f(samples), axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(f(samples), axis=axis, keepdims=keep_dims) else: if not callable(log_prob): raise ValueError('`log_prob` must be a callable function.') @@ -348,7 +348,7 @@ def expectation(f, samples, log_prob=None, use_reparametrization=True, # "Is there a floating point value of x, for which x-x == 0 is false?" # http://stackoverflow.com/q/2686644 fx += stop(fx) * (logpx - stop(logpx)) # Add zeros_like(logpx). - return math_ops.reduce_mean(fx, axis=axis, keep_dims=keep_dims) + return math_ops.reduce_mean(fx, axis=axis, keepdims=keep_dims) def _sample_mean(values): -- GitLab From 3177a76bcb9a2c1166aa9d7e9fbb76d0fef1b6e3 Mon Sep 17 00:00:00 2001 From: Seungil You Date: Thu, 15 Feb 2018 18:34:10 +0900 Subject: [PATCH 0067/2939] Add clean_dep to tf_cc_test. --- tensorflow/tensorflow.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 2ead85d26d..818d67f7b5 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -618,7 +618,7 @@ def tf_cc_test(name, srcs=srcs + tf_binary_additional_srcs(), copts=tf_copts() + extra_copts, linkopts=select({ - "//tensorflow:android": [ + clean_dep("//tensorflow:android"): [ "-pie", ], clean_dep("//tensorflow:windows"): [], -- GitLab From 62b3b9a0d40aacb2c890ed56b831dd2568304f89 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 15 Feb 2018 13:31:49 -0500 Subject: [PATCH 0068/2939] Docs fix r1.6 (#17038) * add missing blank line PiperOrigin-RevId: 185554969 * fix cuDNN64 dll name --- tensorflow/docs_src/get_started/get_started_for_beginners.md | 4 ++++ tensorflow/docs_src/get_started/premade_estimators.md | 1 + tensorflow/docs_src/install/install_windows.md | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/docs_src/get_started/get_started_for_beginners.md b/tensorflow/docs_src/get_started/get_started_for_beginners.md index ea1c2fb3f4..446bae4b89 100644 --- a/tensorflow/docs_src/get_started/get_started_for_beginners.md +++ b/tensorflow/docs_src/get_started/get_started_for_beginners.md @@ -36,6 +36,7 @@ the following three: alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor" src="../images/iris_three_species.jpg"> + **From left to right, [*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by [Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), @@ -188,6 +189,7 @@ provides a programming stack consisting of multiple API layers:
+ **The TensorFlow Programming Environment.**

 

@@ -380,6 +382,7 @@ fully connected neural network consisting of three hidden layers:
+ **A neural network with three hidden layers.**

 

@@ -568,6 +571,7 @@ of 0.5. The following suggests a more effective model:
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0rc0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.5.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
5.5 2.5 4.0 1.3 1 1
+ **A model that is 80% accurate.**

 

diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md index 4f01f997c3..6bffd2e065 100644 --- a/tensorflow/docs_src/get_started/premade_estimators.md +++ b/tensorflow/docs_src/get_started/premade_estimators.md @@ -98,6 +98,7 @@ classifies Iris flowers into three different species based on the size of their alt="Petal geometry compared for three iris species: Iris setosa, Iris virginica, and Iris versicolor" src="../images/iris_three_species.jpg"> + **From left to right, [*Iris setosa*](https://commons.wikimedia.org/w/index.php?curid=170298) (by [Radomil](https://commons.wikimedia.org/wiki/User:Radomil), CC BY-SA 3.0), diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 86a111c2ec..87e1a715aa 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -47,7 +47,7 @@ installed on your system: If you have a different version of one of the preceding packages, please change to the specified versions. In particular, the cuDNN version -must match exactly: TensorFlow will not load if it cannot find `cuDNN64_6.dll`. +must match exactly: TensorFlow will not load if it cannot find `cuDNN64_7.dll`. To use a different version of cuDNN, you must build from source. ## Determine how to install TensorFlow -- GitLab From 4b297b5434438175b016da05421e7ddd46c0f8ee Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 15 Feb 2018 12:39:52 -0800 Subject: [PATCH 0069/2939] Register kernels for Assign and AssignVariableOp on GPU for integer types. PiperOrigin-RevId: 185882834 --- .../kernels/dense_update_functor_gpu.cu.cc | 1 + tensorflow/core/kernels/dense_update_ops.cc | 2 ++ .../core/kernels/resource_variable_ops.cc | 3 +++ tensorflow/core/kernels/variable_ops.cc | 1 + .../python/kernel_tests/array_ops_test.py | 9 ++++--- .../resource_variable_ops_test.py | 24 ++++++++++++------- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc index c9c97dc072..9a3b2303a3 100644 --- a/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc +++ b/tensorflow/core/kernels/dense_update_functor_gpu.cu.cc @@ -57,6 +57,7 @@ struct DenseUpdate { template struct functor::DenseUpdate; \ template struct functor::DenseUpdate; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); +TF_CALL_int64(DEFINE_GPU_KERNELS); #undef DEFINE_GPU_KERNELS #define DEFINE_GPU_KERNELS(T) \ diff --git a/tensorflow/core/kernels/dense_update_ops.cc b/tensorflow/core/kernels/dense_update_ops.cc index 6497c8f371..0de97de205 100644 --- a/tensorflow/core/kernels/dense_update_ops.cc +++ b/tensorflow/core/kernels/dense_update_ops.cc @@ -109,6 +109,7 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); AssignOpT); TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA @@ -142,6 +143,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); Name("AssignSub").Device(DEVICE_GPU).TypeConstraint("T"), \ DenseUpdateOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // end GOOGLE_CUDA diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 5b4aad3cdd..702fb89aac 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -130,6 +130,7 @@ REGISTER_KERNEL_BUILDER( ResourceHandleOp) TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); TF_CALL_variant(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA @@ -398,6 +399,7 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); AssignVariableOp); TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); TF_CALL_variant(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA @@ -456,6 +458,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); AssignUpdateVariableOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index 10ccc85b7c..7fd5809ca4 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -237,6 +237,7 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); IsVariableInitializedOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); +TF_CALL_int64(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 1e2ea82988..365cf72108 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -498,7 +498,7 @@ class StridedSliceTest(test_util.TensorFlowTestCase): def test_basic_slice(self): for tensor_type in STRIDED_SLICE_TYPES: - with self.test_session(use_gpu=True): + with self.test_session(use_gpu=not tensor_type.is_integer): checker = StridedSliceChecker( self, StridedSliceChecker.REF_TENSOR, tensor_type=tensor_type) _ = checker[:, :, :] @@ -884,7 +884,8 @@ class StridedSliceAssignChecker(object): if self.tensor_type.is_complex: value -= 1j * value - with self.test.test_session(use_gpu=True) as sess: + with self.test.test_session( + use_gpu=not self.tensor_type.is_integer) as sess: if self._use_resource: var = resource_variable_ops.ResourceVariable(self.x) else: @@ -974,9 +975,7 @@ class SliceAssignTest(test_util.TensorFlowTestCase): errors.InvalidArgumentError, "l-value dtype int32 does not match r-value dtype int64"): sess.run(v[:].assign(too_large_val)) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - "l-value dtype int32 does not match r-value dtype int8"): + with self.assertRaises(errors.InvalidArgumentError): sess.run(v[:].assign(too_small_val)) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index dc6e73bd5b..8503f3e031 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -64,6 +64,13 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): 0, dtype=dtypes.int32)).run() + def testGPUInt64(self): + if not context.context().num_gpus(): + return + with context.eager_mode(), context.device("gpu:0"): + v = resource_variable_ops.ResourceVariable(1, dtype=dtypes.int64) + self.assertAllEqual(1, v.numpy()) + def testEagerNameNotIdentity(self): with context.eager_mode(): v0 = resource_variable_ops.ResourceVariable(1.0, name="a") @@ -162,14 +169,15 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes(use_gpu=True) def testScatterAdd(self): - handle = resource_variable_ops.var_handle_op( - dtype=dtypes.int32, shape=[1, 1]) - self.evaluate(resource_variable_ops.assign_variable_op( - handle, constant_op.constant([[1]], dtype=dtypes.int32))) - self.evaluate(resource_variable_ops.resource_scatter_add( - handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) - read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) - self.assertEqual(self.evaluate(read), [[3]]) + with ops.device("cpu:0"): + handle = resource_variable_ops.var_handle_op( + dtype=dtypes.int32, shape=[1, 1]) + self.evaluate(resource_variable_ops.assign_variable_op( + handle, constant_op.constant([[1]], dtype=dtypes.int32))) + self.evaluate(resource_variable_ops.resource_scatter_add( + handle, [0], constant_op.constant([[2]], dtype=dtypes.int32))) + read = resource_variable_ops.read_variable_op(handle, dtype=dtypes.int32) + self.assertEqual(self.evaluate(read), [[3]]) def testScatterUpdateString(self): handle = resource_variable_ops.var_handle_op( -- GitLab From 972fa89023f8f27948321c388fa3f1f7857833c3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 12:50:03 -0800 Subject: [PATCH 0070/2939] Add auc_with_confidence_intervals This method computes the AUC and corresponding confidence intervals using an efficient algorithm. PiperOrigin-RevId: 185884228 --- tensorflow/contrib/metrics/BUILD | 1 + tensorflow/contrib/metrics/__init__.py | 2 + .../contrib/metrics/python/ops/metric_ops.py | 291 ++++++++++++++++++ .../metrics/python/ops/metric_ops_test.py | 199 ++++++++++++ 4 files changed, 493 insertions(+) diff --git a/tensorflow/contrib/metrics/BUILD b/tensorflow/contrib/metrics/BUILD index 9de664c822..e90c525113 100644 --- a/tensorflow/contrib/metrics/BUILD +++ b/tensorflow/contrib/metrics/BUILD @@ -43,6 +43,7 @@ py_library( "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python:weights_broadcast_ops", + "//tensorflow/python/ops/distributions", ], ) diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index d3dce46bfb..de02dc8f45 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -16,6 +16,7 @@ See the @{$python/contrib.metrics} guide. +@@auc_with_confidence_intervals @@streaming_accuracy @@streaming_mean @@streaming_recall @@ -83,6 +84,7 @@ from tensorflow.contrib.metrics.python.ops.confusion_matrix_ops import confusion from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histogram from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics +from tensorflow.contrib.metrics.python.ops.metric_ops import auc_with_confidence_intervals from tensorflow.contrib.metrics.python.ops.metric_ops import cohen_kappa from tensorflow.contrib.metrics.python.ops.metric_ops import count from tensorflow.contrib.metrics.python.ops.metric_ops import precision_recall_at_equal_thresholds diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 55946c128b..fc12bfd2b7 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -38,6 +38,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import weights_broadcast_ops +from tensorflow.python.ops.distributions.normal import Normal from tensorflow.python.util.deprecation import deprecated # Epsilon constant used to represent extremely small quantity. @@ -1196,6 +1197,295 @@ def streaming_dynamic_auc(labels, return auc, update_op +def _compute_placement_auc(labels, predictions, weights, alpha, + logit_transformation, is_valid): + """Computes the AUC and asymptotic normally distributed confidence interval. + + The calculations are achieved using the fact that AUC = P(Y_1>Y_0) and the + concept of placement values for each labeled group, as presented by Delong and + Delong (1988). The actual algorithm used is a more computationally efficient + approach presented by Sun and Xu (2014). This could be slow for large batches, + but has the advantage of not having its results degrade depending on the + distribution of predictions. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as + `predictions` with values of 0 or 1 and type `int64`. + predictions: A 1-D `Tensor` of predictions whose values are `float64`. + weights: `Tensor` whose rank is either 0, or the same rank as `labels`. + alpha: Confidence interval level desired. + logit_transformation: A boolean value indicating whether the estimate should + be logit transformed prior to calculating the confidence interval. Doing + so enforces the restriction that the AUC should never be outside the + interval [0,1]. + is_valid: A bool tensor describing whether the input is valid. + + Returns: + A 1-D `Tensor` containing the area-under-curve, lower, and upper confidence + interval values. + """ + # Disable the invalid-name checker so that we can capitalize the name. + # pylint: disable=invalid-name + AucData = collections_lib.namedtuple('AucData', ['auc', 'lower', 'upper']) + # pylint: enable=invalid-name + + # If all the labels are the same or if number of observations are too few, + # AUC isn't well-defined + size = array_ops.size(predictions, out_type=dtypes.int32) + + # Count the total number of positive and negative labels in the input. + total_0 = math_ops.reduce_sum( + math_ops.cast(1 - labels, weights.dtype) * weights) + total_1 = math_ops.reduce_sum( + math_ops.cast(labels, weights.dtype) * weights) + + # Sort the predictions ascending, as well as + # (i) the corresponding labels and + # (ii) the corresponding weights. + ordered_predictions, indices = nn.top_k(predictions, k=size, sorted=True) + ordered_predictions = array_ops.reverse( + ordered_predictions, axis=array_ops.zeros(1, dtypes.int32)) + indices = array_ops.reverse(indices, axis=array_ops.zeros(1, dtypes.int32)) + ordered_labels = array_ops.gather(labels, indices) + ordered_weights = array_ops.gather(weights, indices) + + # We now compute values required for computing placement values. + + # We generate a list of indices (segmented_indices) of increasing order. An + # index is assigned for each unique prediction float value. Prediction + # values that are the same share the same index. + _, segmented_indices = array_ops.unique(ordered_predictions) + + # We create 2 tensors of weights. weights_for_true is non-zero for true + # labels. weights_for_false is non-zero for false labels. + float_labels_for_true = math_ops.cast(ordered_labels, dtypes.float32) + float_labels_for_false = 1.0 - float_labels_for_true + weights_for_true = ordered_weights * float_labels_for_true + weights_for_false = ordered_weights * float_labels_for_false + + # For each set of weights with the same segmented indices, we add up the + # weight values. Note that for each label, we deliberately rely on weights + # for the opposite label. + weight_totals_for_true = math_ops.segment_sum(weights_for_false, + segmented_indices) + weight_totals_for_false = math_ops.segment_sum(weights_for_true, + segmented_indices) + + # These cumulative sums of weights importantly exclude the current weight + # sums. + cum_weight_totals_for_true = math_ops.cumsum(weight_totals_for_true, + exclusive=True) + cum_weight_totals_for_false = math_ops.cumsum(weight_totals_for_false, + exclusive=True) + + # Compute placement values using the formula. Values with the same segmented + # indices and labels share the same placement values. + placements_for_true = ( + (cum_weight_totals_for_true + weight_totals_for_true / 2.0) / + (math_ops.reduce_sum(weight_totals_for_true) + _EPSILON)) + placements_for_false = ( + (cum_weight_totals_for_false + weight_totals_for_false / 2.0) / + (math_ops.reduce_sum(weight_totals_for_false) + _EPSILON)) + + # We expand the tensors of placement values (for each label) so that their + # shapes match that of predictions. + placements_for_true = array_ops.gather(placements_for_true, segmented_indices) + placements_for_false = array_ops.gather(placements_for_false, + segmented_indices) + + # Select placement values based on the label for each index. + placement_values = ( + placements_for_true * float_labels_for_true + + placements_for_false * float_labels_for_false) + + # Split placement values by labeled groups. + placement_values_0 = placement_values * math_ops.cast( + 1 - ordered_labels, weights.dtype) + weights_0 = ordered_weights * math_ops.cast( + 1 - ordered_labels, weights.dtype) + placement_values_1 = placement_values * math_ops.cast( + ordered_labels, weights.dtype) + weights_1 = ordered_weights * math_ops.cast( + ordered_labels, weights.dtype) + + # Calculate AUC using placement values + auc_0 = (math_ops.reduce_sum(weights_0 * (1. - placement_values_0)) / + (total_0 + _EPSILON)) + auc_1 = (math_ops.reduce_sum(weights_1 * (placement_values_1)) / + (total_1 + _EPSILON)) + auc = array_ops.where(math_ops.less(total_0, total_1), auc_1, auc_0) + + # Calculate variance and standard error using the placement values. + var_0 = ( + math_ops.reduce_sum( + weights_0 * math_ops.square(1. - placement_values_0 - auc_0)) / + (total_0 - 1. + _EPSILON)) + var_1 = ( + math_ops.reduce_sum( + weights_1 * math_ops.square(placement_values_1 - auc_1)) / + (total_1 - 1. + _EPSILON)) + auc_std_err = math_ops.sqrt( + (var_0 / (total_0 + _EPSILON)) + (var_1 / (total_1 + _EPSILON))) + + # Calculate asymptotic normal confidence intervals + std_norm_dist = Normal(loc=0., scale=1.) + z_value = std_norm_dist.quantile((1.0 - alpha) / 2.0) + if logit_transformation: + estimate = math_ops.log(auc / (1. - auc + _EPSILON)) + std_err = auc_std_err / (auc * (1. - auc + _EPSILON)) + transformed_auc_lower = estimate + (z_value * std_err) + transformed_auc_upper = estimate - (z_value * std_err) + def inverse_logit_transformation(x): + exp_negative = math_ops.exp(math_ops.negative(x)) + return 1. / (1. + exp_negative + _EPSILON) + + auc_lower = inverse_logit_transformation(transformed_auc_lower) + auc_upper = inverse_logit_transformation(transformed_auc_upper) + else: + estimate = auc + std_err = auc_std_err + auc_lower = estimate + (z_value * std_err) + auc_upper = estimate - (z_value * std_err) + + ## If estimate is 1 or 0, no variance is present so CI = 1 + ## n.b. This can be misleading, since number obs can just be too low. + lower = array_ops.where( + math_ops.logical_or( + math_ops.equal(auc, array_ops.ones_like(auc)), + math_ops.equal(auc, array_ops.zeros_like(auc))), + auc, auc_lower) + upper = array_ops.where( + math_ops.logical_or( + math_ops.equal(auc, array_ops.ones_like(auc)), + math_ops.equal(auc, array_ops.zeros_like(auc))), + auc, auc_upper) + + # If all the labels are the same, AUC isn't well-defined (but raising an + # exception seems excessive) so we return 0, otherwise we finish computing. + trivial_value = array_ops.constant(0.0) + + return AucData(*control_flow_ops.cond( + is_valid, lambda: [auc, lower, upper], lambda: [trivial_value]*3)) + + +def auc_with_confidence_intervals(labels, + predictions, + weights=None, + alpha=0.95, + logit_transformation=True, + metrics_collections=(), + updates_collections=(), + name=None): + """Computes the AUC and asymptotic normally distributed confidence interval. + + USAGE NOTE: this approach requires storing all of the predictions and labels + for a single evaluation in memory, so it may not be usable when the evaluation + batch size and/or the number of evaluation steps is very large. + + Computes the area under the ROC curve and its confidence interval using + placement values. This has the advantage of being resilient to the + distribution of predictions by aggregating across batches, accumulating labels + and predictions and performing the final calculation using all of the + concatenated values. + + Args: + labels: A `Tensor` of ground truth labels with the same shape as `labels` + and with values of 0 or 1 whose values are castable to `int64`. + predictions: A `Tensor` of predictions whose values are castable to + `float64`. Will be flattened into a 1-D `Tensor`. + weights: Optional `Tensor` whose rank is either 0, or the same rank as + `labels`. + alpha: Confidence interval level desired. + logit_transformation: A boolean value indicating whether the estimate should + be logit transformed prior to calculating the confidence interval. Doing + so enforces the restriction that the AUC should never be outside the + interval [0,1]. + metrics_collections: An optional iterable of collections that `auc` should + be added to. + updates_collections: An optional iterable of collections that `update_op` + should be added to. + name: An optional name for the variable_scope that contains the metric + variables. + + Returns: + auc: A 1-D `Tensor` containing the current area-under-curve, lower, and + upper confidence interval values. + update_op: An operation that concatenates the input labels and predictions + to the accumulated values. + + Raises: + ValueError: If `labels`, `predictions`, and `weights` have mismatched shapes + or if `alpha` isn't in the range (0,1). + """ + if not (alpha > 0 and alpha < 1): + raise ValueError('alpha must be between 0 and 1; currently %.02f' % alpha) + + if weights is None: + weights = array_ops.ones_like(predictions) + + with variable_scope.variable_scope( + name, + default_name='auc_with_confidence_intervals', + values=[labels, predictions, weights]): + + predictions, labels, weights = metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access + predictions=predictions, + labels=labels, + weights=weights) + + total_weight = math_ops.reduce_sum(weights) + + weights = array_ops.reshape(weights, [-1]) + predictions = array_ops.reshape( + math_ops.cast(predictions, dtypes.float64), [-1]) + labels = array_ops.reshape(math_ops.cast(labels, dtypes.int64), [-1]) + + with ops.control_dependencies([ + check_ops.assert_greater_equal( + labels, + array_ops.zeros_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is <0'), + check_ops.assert_less_equal( + labels, + array_ops.ones_like(labels, dtypes.int64), + message='labels must be 0 or 1, at least one is >1'), + ]): + preds_accum, update_preds = streaming_concat( + predictions, name='concat_preds') + labels_accum, update_labels = streaming_concat(labels, + name='concat_labels') + weights_accum, update_weights = streaming_concat( + weights, name='concat_weights') + update_op_for_valid_case = control_flow_ops.group( + update_labels, update_preds, update_weights) + + # Only perform updates if this case is valid. + all_labels_positive_or_0 = math_ops.logical_and( + math_ops.equal(math_ops.reduce_min(labels), 0), + math_ops.equal(math_ops.reduce_max(labels), 1)) + sums_of_weights_at_least_1 = math_ops.greater_equal(total_weight, 1.0) + is_valid = math_ops.logical_and(all_labels_positive_or_0, + sums_of_weights_at_least_1) + + update_op = control_flow_ops.cond( + sums_of_weights_at_least_1, + lambda: update_op_for_valid_case, control_flow_ops.no_op) + + auc = _compute_placement_auc( + labels_accum, + preds_accum, + weights_accum, + alpha=alpha, + logit_transformation=logit_transformation, + is_valid=is_valid) + + if updates_collections: + ops.add_to_collections(updates_collections, update_op) + if metrics_collections: + ops.add_to_collections(metrics_collections, auc) + return auc, update_op + + def precision_recall_at_equal_thresholds(labels, predictions, weights=None, @@ -3430,6 +3720,7 @@ def cohen_kappa(labels, __all__ = [ + 'auc_with_confidence_intervals', 'aggregate_metric_map', 'aggregate_metrics', 'cohen_kappa', diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index b4e365d10f..b387f26c01 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -2128,6 +2128,205 @@ class StreamingDynamicAUCTest(test.TestCase): self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-5) +class AucWithConfidenceIntervalsTest(test.TestCase): + + def setUp(self): + np.random.seed(1) + ops.reset_default_graph() + + def _testResultsEqual(self, expected_dict, gotten_result): + """Tests that 2 results (dicts) represent the same data. + + Args: + expected_dict: A dictionary with keys that are the names of properties + of PrecisionRecallData and whose values are lists of floats. + gotten_result: A AucWithConfidenceIntervalData object. + """ + gotten_dict = {k: t.eval() for k, t in gotten_result._asdict().items()} + self.assertItemsEqual( + list(expected_dict.keys()), list(gotten_dict.keys())) + + for key, expected_values in expected_dict.items(): + self.assertAllClose(expected_values, gotten_dict[key]) + + def _testCase(self, predictions, labels, expected_result, weights=None): + """Performs a test given a certain scenario of labels, predictions, weights. + + Args: + predictions: The predictions tensor. Of type float32. + labels: The labels tensor. Of type bool. + expected_result: The expected result (dict) that maps to tensors. + weights: Optional weights tensor. + """ + with self.test_session() as sess: + predictions_tensor = constant_op.constant( + predictions, dtype=dtypes_lib.float32) + labels_tensor = constant_op.constant(labels, dtype=dtypes_lib.int64) + weights_tensor = None + if weights: + weights_tensor = constant_op.constant(weights, dtype=dtypes_lib.float32) + gotten_result, update_op = ( + metric_ops.auc_with_confidence_intervals( + labels=labels_tensor, + predictions=predictions_tensor, + weights=weights_tensor)) + + sess.run(variables.local_variables_initializer()) + sess.run(update_op) + + self._testResultsEqual(expected_result, gotten_result) + + def testAucAllCorrect(self): + self._testCase( + predictions=[0., 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[0, 0, 1, 0, 0, 1, 0, 1, 1, 0], + expected_result={ + 'auc': 0.66666667, + 'lower': 0.27826795, + 'upper': 0.91208512, + }) + + def testAucUnorderedInput(self): + self._testCase( + predictions=[1.0, 0.6, 0., 0.3, 0.4, 0.2, 0.5, 0.3, 0.6, 0.8], + labels=[0, 1, 0, 1, 0, 0, 1, 0, 0, 1], + expected_result={ + 'auc': 0.66666667, + 'lower': 0.27826795, + 'upper': 0.91208512, + }) + + def testAucWithWeights(self): + self._testCase( + predictions=[0., 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[0, 0, 1, 0, 0, 1, 0, 1, 1, 0], + weights=[0.5, 0.6, 1.2, 1.5, 2.0, 2.0, 1.5, 1.2, 0.6, 0.5], + expected_result={ + 'auc': 0.65151515, + 'lower': 0.28918604, + 'upper': 0.89573906, + }) + + def testAucEqualOne(self): + self._testCase( + predictions=[0, 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[0, 0, 0, 0, 0, 1, 1, 1, 1, 1], + expected_result={ + 'auc': 1.0, + 'lower': 1.0, + 'upper': 1.0, + }) + + def testAucEqualZero(self): + self._testCase( + predictions=[0, 0.2, 0.3, 0.3, 0.4, 0.5, 0.6, 0.6, 0.8, 1.0], + labels=[1, 1, 1, 1, 1, 0, 0, 0, 0, 0], + expected_result={ + 'auc': 0.0, + 'lower': 0.0, + 'upper': 0.0, + }) + + def testNonZeroOnePredictions(self): + self._testCase( + predictions=[2.5, -2.5, .5, -.5, 1], + labels=[1, 0, 1, 0, 0], + expected_result={ + 'auc': 0.83333333, + 'lower': 0.15229267, + 'upper': 0.99286517, + }) + + def testAllLabelsOnes(self): + self._testCase( + predictions=[1., 1., 1., 1., 1.], + labels=[1, 1, 1, 1, 1], + expected_result={ + 'auc': 0., + 'lower': 0., + 'upper': 0., + }) + + def testAllLabelsZeros(self): + self._testCase( + predictions=[0., 0., 0., 0., 0.], + labels=[0, 0, 0, 0, 0], + expected_result={ + 'auc': 0., + 'lower': 0., + 'upper': 0., + }) + + def testWeightSumLessThanOneAll(self): + self._testCase( + predictions=[1., 1., 0., 1., 0., 0.], + labels=[1, 1, 1, 0, 0, 0], + weights=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1], + expected_result={ + 'auc': 0., + 'lower': 0., + 'upper': 0., + }) + + def testWithMultipleUpdates(self): + batch_size = 50 + num_batches = 100 + labels = np.array([]) + predictions = np.array([]) + tf_labels = variables.Variable(array_ops.ones(batch_size, dtypes_lib.int32), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.int32) + tf_predictions = variables.Variable( + array_ops.ones(batch_size), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.float32) + auc, update_op = metrics.auc_with_confidence_intervals(tf_labels, + tf_predictions) + with self.test_session() as sess: + sess.run(variables.local_variables_initializer()) + for _ in xrange(num_batches): + new_labels = np.random.randint(0, 2, size=batch_size) + noise = np.random.normal(0.0, scale=0.2, size=batch_size) + new_predictions = 0.4 + 0.2 * new_labels + noise + labels = np.concatenate([labels, new_labels]) + predictions = np.concatenate([predictions, new_predictions]) + sess.run(tf_labels.assign(new_labels)) + sess.run(tf_predictions.assign(new_predictions)) + sess.run(update_op) + expected_auc = _np_auc(predictions, labels) + self.assertAllClose(expected_auc, auc.auc.eval()) + + def testExceptionOnFloatLabels(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0, 1, 0], dtypes_lib.float32) + labels = constant_op.constant([0.7, 0, 1, 0, 1]) + _, update_op = metrics.auc_with_confidence_intervals(labels, predictions) + sess.run(variables.local_variables_initializer()) + self.assertRaises(TypeError, sess.run(update_op)) + + def testExceptionOnGreaterThanOneLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0, 1, 0], dtypes_lib.float32) + labels = constant_op.constant([2, 1, 0, 1, 0]) + _, update_op = metrics.auc_with_confidence_intervals(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is >1.*'): + sess.run(update_op) + + def testExceptionOnNegativeLabel(self): + with self.test_session() as sess: + predictions = constant_op.constant([1, 0.5, 0, 1, 0], dtypes_lib.float32) + labels = constant_op.constant([1, 0, -1, 1, 0]) + _, update_op = metrics.auc_with_confidence_intervals(labels, predictions) + sess.run(variables.local_variables_initializer()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + '.*labels must be 0 or 1, at least one is <0.*'): + sess.run(update_op) + + class StreamingPrecisionRecallAtEqualThresholdsTest(test.TestCase): def setUp(self): -- GitLab From d81104a09de68a06e4b607cf8761f1e3affea829 Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Thu, 15 Feb 2018 13:35:19 -0800 Subject: [PATCH 0071/2939] Optimize dot(DynamicSlice(ConstA), ConstantB) by memoizing dot(ConstA, ConstB) Make transformation when ConstA and ConstB are 2D, and DynamicSlice is slicing a full row, column respectively. Handle: dot(DynamicSlice(Index, ConstA), ConstB) => DynamicSlice(Index, dot*(ConstA, ConstB)); and dot(ConstA, DynamicSlice(Index, ConstB)) => DynamicSlice(Index, dot*(ConstA, ConstB)); PiperOrigin-RevId: 185891869 --- .../xla/service/algebraic_simplifier.cc | 141 ++++++++++ .../xla/service/algebraic_simplifier_test.cc | 203 +++++++++++++++ .../compiler/xla/tests/dot_operation_test.cc | 246 ++++++++++++++++++ 3 files changed, 590 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index fb857559f9..6f6c2391f3 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -284,6 +284,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim, HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped); + StatusOr OptimizeDotOfGather(HloInstruction* dot); + // Current HloComputation instance the AlgebraicSimplifierVisitor is // traversing. HloComputation* computation_; @@ -917,6 +919,134 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper( return add_result; } +StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfGather( + HloInstruction* dot) { + const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); + if (dnums.lhs_contracting_dimensions_size() != 1 || + dnums.rhs_contracting_dimensions_size() != 1 || + dnums.lhs_batch_dimensions_size() != 0 || + dnums.rhs_batch_dimensions_size() != 0 || + dot->shape().dimensions_size() != 2) { // dot output 2D + VLOG(10) << "DotOfGather: Can only optimize 2D, non-batch dot operations."; + return nullptr; + } + + // Optimize either dot(DS(ctA), ctB)) or dot(ctB, DS(ctA)). + // Currently a Gather is a DynamicSlice. + auto is_dynamic_slice_constant_combination = + [](HloInstruction* a, HloInstruction* b, int a_contracting_dimension) { + // First operand is a DynamicSlice(Constant). + if (a->opcode() != HloOpcode::kDynamicSlice) { + return false; + } + auto* dynamic_slice_op = a->operand(0); + if (dynamic_slice_op->opcode() != HloOpcode::kConstant) { + return false; + } + // Second operand is a Constant. + if (b->opcode() != HloOpcode::kConstant) { + return false; + } + // The DynamicSlice output is a vector. + const Shape& dynamic_slice_shape = a->shape(); + if (dynamic_slice_shape.dimensions(1 - a_contracting_dimension) != 1) { + return false; + } + // Constant size is the same before and after slice in the contracting + // dimension, otherwise we either must precompute for all possible slice + // indices or dot is invalid. + const Shape& dynamic_slice_op_shape = dynamic_slice_op->shape(); + if (dynamic_slice_op_shape.dimensions(a_contracting_dimension) != + dynamic_slice_shape.dimensions(a_contracting_dimension)) { + return false; + } + return true; + }; + + HloInstruction* lhs = dot->mutable_operand(0); + HloInstruction* rhs = dot->mutable_operand(1); + int lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0); + int rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0); + + if (!is_dynamic_slice_constant_combination( + lhs, rhs, /*a_contracting_dimension=*/lhs_contracting_dimension) && + !is_dynamic_slice_constant_combination( + rhs, lhs, /*a_contracting_dimension=*/rhs_contracting_dimension)) { + VLOG(10) << "DotOfGather: Can only optimize dot(DS(ctA), ctB)) or " + "dot(ctB, DS(ctA)), where the two constants have equal " + "contracting dimensions."; + return nullptr; + } + + // LHS is DynamicSlice: + // input: dot(DS(ctA), ctB)) + // where DS(ctA) = DS({M x K}, {start, 0}, {1, K}) and ctB = {K x N}. + // => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. + // output: DS(dot(ctA, ctB)) + // => output dimensions: DS ({M x N}, {start, 0}, {1, N}) => {1 x N}. + + // RHS is DynamicSlice: + // input: dot(ctA, DS(ctB)) + // where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, start}, {K, 1}). + // => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. + // output: DS(dot(ctA, ctB)) + // => output dimensions: DS ({M x N}, {0, start}, {M, 1}) => {M x 1}. + + bool lhs_is_dynamic_slice = lhs->opcode() == HloOpcode::kDynamicSlice; + + // ctA: + HloInstruction* left_operand = + lhs_is_dynamic_slice ? lhs->mutable_operand(0) : lhs; + // ctB: + HloInstruction* right_operand = + lhs_is_dynamic_slice ? rhs : rhs->mutable_operand(0); + // Build ctA x ctB. + const int m = left_operand->shape().dimensions(1 - lhs_contracting_dimension); + const int n = + right_operand->shape().dimensions(1 - rhs_contracting_dimension); + auto memoized_shape = ShapeUtil::MakeShape(F32, {m, n}); + auto* memoized_inst = computation_->AddInstruction(HloInstruction::CreateDot( + memoized_shape, left_operand, right_operand, dnums)); + // Get pair {start, 0} or {0, start}. + HloInstruction* original_start_indices = + lhs_is_dynamic_slice ? lhs->mutable_operand(1) : rhs->mutable_operand(1); + // Position of start: + int index_of_non_zero_start = lhs_is_dynamic_slice + ? 1 - lhs_contracting_dimension + : 1 - rhs_contracting_dimension; + // Position of zero: + int index_of_zero_start = 1 - index_of_non_zero_start; + + // Slice out start and 0 components and reorder if necessary. + auto indices_type = original_start_indices->shape().element_type(); + Shape s_shape = ShapeUtil::MakeShape(indices_type, {1}); + Shape d_shape = ShapeUtil::MakeShape(indices_type, {2}); + HloInstruction* non_zero_start = + computation_->AddInstruction(HloInstruction::CreateSlice( + s_shape, original_start_indices, {index_of_non_zero_start}, + {index_of_non_zero_start + 1}, {1})); + HloInstruction* zero_start = + computation_->AddInstruction(HloInstruction::CreateSlice( + s_shape, original_start_indices, {index_of_zero_start}, + {index_of_zero_start + 1}, {1})); + HloInstruction* new_start_indices = + lhs_is_dynamic_slice + ? computation_->AddInstruction(HloInstruction::CreateConcatenate( + d_shape, {non_zero_start, zero_start}, 0)) + : computation_->AddInstruction(HloInstruction::CreateConcatenate( + d_shape, {zero_start, non_zero_start}, 0)); + + // Build DynamicSlice(ctA x ctB). + const int new_slice_m = lhs_is_dynamic_slice ? 1 : m; + const int new_slice_n = lhs_is_dynamic_slice ? n : 1; + auto* memoized_lookup = + computation_->AddInstruction(HloInstruction::CreateDynamicSlice( + dot->shape(), memoized_inst, new_start_indices, + {new_slice_m, new_slice_n})); + + return memoized_lookup; +} + Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { auto lhs = dot->mutable_operand(0); auto rhs = dot->mutable_operand(1); @@ -946,6 +1076,17 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { return ReplaceInstruction(dot, dot_of_concat_optimized); } + // Simplify dot(ConstA, Gather(Index, ConstB)) to: + // Gather(Index, dot*(ConstA, ConstB)), where dot* is an appropriately + // batched version of dot. + TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_gather_optimized, + OptimizeDotOfGather(dot)); + if (dot_of_gather_optimized) { + VLOG(10) << "Replaced dot(constA, gather(i, constB)) with " + "gather(i, dot*(constA, constB))"; + return ReplaceInstruction(dot, dot_of_gather_optimized); + } + if (enable_dot_strength_reduction_ && !is_layout_sensitive_) { TF_ASSIGN_OR_RETURN(bool did_strength_reduction, HandleDotStrengthReduction(dot)); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 0f08eb3a32..fc78420147 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2772,5 +2772,208 @@ DotOfConcatTestSpec kDotOfConcatTestSpecs[] = { INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); + +struct DotOfGatherTestSpec { + int64 m; + int64 k; + int64 n; + int s; // start index for dynamic slice on the non-contracting dimension + int64 lcd; // left contracting dimension + int64 rcd; // right contracting dimension + bool neg; // is negative testcase +}; + +class DotOfGatherSimplificationTest + : public HloVerifiedTestBase, + public ::testing::WithParamInterface {}; + +// input: dot(DS(ctA), ctB)) +// where DS(ctA) = DS({M x K}, {s, 0}, {1, K}) and ctB = {K x N}. +// => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. +// output: DS(dot(ctA, ctB)) +// => output dimensions: DS ({M x N}, {s, 0}, {1, N}) => {1 x N}. +TEST_P(DotOfGatherSimplificationTest, ConstantRHS) { + HloComputation::Builder builder(TestName()); + + DotOfGatherTestSpec spec = GetParam(); + + ASSERT_LE(spec.s, spec.m); + + // For negative tests, increase k of the dynamic slice argument to prevent the + // optimization (constants ctA, ctB must have equal contracting dimensions). + int64 k_increase = spec.neg ? 5 : 0; + int64 lhs_rows = (spec.lcd == 0) ? (spec.k + k_increase) : spec.m; + int64 lhs_cols = (spec.lcd == 0) ? spec.m : (spec.k + k_increase); + Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); + auto* lhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, + /*cols=*/lhs_cols))); + + int32 start_row = (spec.lcd == 0) ? 0 : spec.s; + int32 start_col = (spec.lcd == 0) ? spec.s : 0; + const auto start_indices = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({start_row, start_col}))); + int64 slice_row_size = (spec.lcd == 0) ? spec.k : 1; + int64 slice_col_size = (spec.lcd == 0) ? 1 : spec.k; + Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); + auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( + ds_shape, lhs, start_indices, {slice_row_size, slice_col_size})); + + int64 rhs_rows = (spec.rcd == 0) ? spec.k : spec.n; + int64 rhs_cols = (spec.rcd == 0) ? spec.n : spec.k; + Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); + auto* rhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, + /*cols=*/rhs_cols))); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(spec.lcd); + dot_dnums.add_rhs_contracting_dimensions(spec.rcd); + + int64 dot_row_size = 1; + int64 dot_col_size = spec.n; + Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); + builder.AddInstruction( + HloInstruction::CreateDot(dot_shape, ds, rhs, dot_dnums)); + + auto computation = module().AddEntryComputation(builder.Build()); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); + ASSERT_TRUE(run_successful); + EXPECT_TRUE( + ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); + + if (spec.neg) { + EXPECT_NE(computation->root_instruction()->opcode(), + HloOpcode::kDynamicSlice); + } else { + EXPECT_THAT(computation->root_instruction(), + op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), + op::Concatenate())); + } +} + +// input: dot(ctA, DS(ctB)) +// where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, s}, {K, 1}). +// => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. +// output: DS(dot(ctA, ctB)) +// => output dimensions: DS ({M x N}, {0, s}, {M, 1}) => {M x 1}. +TEST_P(DotOfGatherSimplificationTest, ConstantLHS) { + HloComputation::Builder builder(TestName()); + + DotOfGatherTestSpec spec = GetParam(); + + ASSERT_LE(spec.s, spec.n); + + int64 lhs_rows = (spec.lcd == 0) ? spec.k : spec.m; + int64 lhs_cols = (spec.lcd == 0) ? spec.m : spec.k; + Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); + auto* lhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, + /*cols=*/lhs_cols))); + + // For negative tests increase k of the dynamic slice argument to prevent the + // optimization + int64 k_increase = spec.neg ? 5 : 0; + int64 rhs_rows = (spec.rcd == 0) ? (spec.k + k_increase) : spec.n; + int64 rhs_cols = (spec.rcd == 0) ? spec.n : (spec.k + k_increase); + Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); + auto* rhs = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( + /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, + /*cols=*/rhs_cols))); + + int32 start_row = (spec.rcd == 0) ? 0 : spec.s; + int32 start_col = (spec.rcd == 0) ? spec.s : 0; + const auto start_indices = + builder.AddInstruction(HloInstruction::CreateConstant( + Literal::CreateR1({start_row, start_col}))); + int64 slice_row_size = (spec.rcd == 0) ? spec.k : 1; + int64 slice_col_size = (spec.rcd == 0) ? 1 : spec.k; + Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); + auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( + ds_shape, rhs, start_indices, {slice_row_size, slice_col_size})); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(spec.lcd); + dot_dnums.add_rhs_contracting_dimensions(spec.rcd); + + int64 dot_row_size = spec.m; + int64 dot_col_size = 1; + Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); + builder.AddInstruction( + HloInstruction::CreateDot(dot_shape, lhs, ds, dot_dnums)); + + auto computation = module().AddEntryComputation(builder.Build()); + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); + ASSERT_TRUE(run_successful); + EXPECT_TRUE( + ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); + + if (spec.neg) { + EXPECT_NE(computation->root_instruction()->opcode(), + HloOpcode::kDynamicSlice); + } else { + EXPECT_THAT(computation->root_instruction(), + op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), + op::Concatenate())); + } +} + +std::vector DotOfGatherPositiveNegativeTests() { + std::vector positives = { + // "Classical dot", i.e. matrix multiply: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/0, + /*neg=*/false}, + // Note: testing for m=1 and n=1 is unnecessary, as this optimizes to + // dot(ct, ct) before DotOfGather optimization kicks in. + // Contract on rows: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/0, + /*neg=*/false}, + // Reverse matrix multiply: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/1, + /*neg=*/false}, + // Contract on columns: + {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/1, + /*neg=*/false}, + }; + std::vector all; + for (int i = 0; i < positives.size(); i++) { + DotOfGatherTestSpec positive_test = positives[i]; + all.push_back(positive_test); + DotOfGatherTestSpec negative_test = positive_test; + negative_test.neg = true; + all.push_back(negative_test); + } + return all; +} + +INSTANTIATE_TEST_CASE_P( + DotOfGatherSimplificationTestInstantiation, DotOfGatherSimplificationTest, + ::testing::ValuesIn(DotOfGatherPositiveNegativeTests())); + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 6b0c04c2c0..63354d4b30 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -703,5 +703,251 @@ TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) { &builder, expected, {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); } + +TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{96.0, 105.0, 114.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{105.0}, {105.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( + DotOfGatherOptimizationWithConstRHSReverseMM)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{105.0, 105.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( + DotOfGatherOptimizationWithConstLHSReverseMM)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{96.0}, {105.0}, {114.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0}, + {3.0, 4.0}, + {5.0, 6.0}, + {6.0, 5.0}, + {4.0, 3.0}, + {2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{126.0, 129.0, 132.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) { + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0, 2.0}, + {3.0, 4.0}, + {5.0, 6.0}, + {6.0, 5.0}, + {4.0, 3.0}, + {2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0}, + {4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0}, + {9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0}, + {3.0, 2.0, 1.0}})); + // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({0, 1}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(0); + dot_dnums.add_rhs_contracting_dimensions(0); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{129.0}, {129.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); + + Array2D expected({{56.0, 168.0, 91.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} + +// TODO (b/69062148) Enable when Dot implements general contracting dimensions. +TEST_F(DotOperationTest, + DISABLED_ON_CPU(DISABLED_ON_GPU( + DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) { + std::unique_ptr> constant_lhs_array(new Array2D( + {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, + {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, + {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} + + ComputationBuilder builder(client_, TestName()); + auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); + auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); + auto start_constant = builder.ConstantR1({1, 0}); + auto dynamic_slice = + builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); + + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(1); + auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); + + Array2D expected({{168.0}, {168.0}}); + ComputeAndCompareR2(&builder, expected, {}, error_spec_); +} } // namespace } // namespace xla -- GitLab From 82d67d0af2ed13bdf003e69486f3f477961ef407 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 15 Feb 2018 13:40:10 -0800 Subject: [PATCH 0072/2939] Wrap XlaOpRegistry::DeviceKernels call to call in python. PiperOrigin-RevId: 185892888 --- tensorflow/compiler/tf2xla/python/BUILD | 21 +++++++++++++++++++ .../tf2xla/python/xla_op_registry.clif | 7 +++++++ tensorflow/compiler/tf2xla/xla_op_registry.cc | 2 ++ tensorflow/core/BUILD | 7 +++++++ 4 files changed, 37 insertions(+) create mode 100644 tensorflow/compiler/tf2xla/python/BUILD create mode 100644 tensorflow/compiler/tf2xla/python/xla_op_registry.clif diff --git a/tensorflow/compiler/tf2xla/python/BUILD b/tensorflow/compiler/tf2xla/python/BUILD new file mode 100644 index 0000000000..49bde78039 --- /dev/null +++ b/tensorflow/compiler/tf2xla/python/BUILD @@ -0,0 +1,21 @@ +licenses(["notice"]) # Apache 2.0 + +package( + default_visibility = ["//tensorflow:internal"], +) + +load( + "//tensorflow/core:platform/default/build_config.bzl", + "tf_py_clif_cc", +) + +tf_py_clif_cc( + name = "xla_op_registry", + srcs = ["xla_op_registry.clif"], + pyclif_deps = [ + "//tensorflow/core:framework/kernel_def_pyclif", + ], + deps = [ + "//tensorflow/compiler/tf2xla:xla_compiler", + ], +) diff --git a/tensorflow/compiler/tf2xla/python/xla_op_registry.clif b/tensorflow/compiler/tf2xla/python/xla_op_registry.clif new file mode 100644 index 0000000000..e1ee6cc656 --- /dev/null +++ b/tensorflow/compiler/tf2xla/python/xla_op_registry.clif @@ -0,0 +1,7 @@ +from "third_party/tensorflow/core/framework/kernel_def_pyclif.h" import * # KernelDef + +from "third_party/tensorflow/compiler/tf2xla/xla_op_registry.h": + namespace `tensorflow`: + def `XlaOpRegistry::DeviceKernels` as + device_kernels(device: str, include_compilation_only_kernels: bool) -> + list diff --git a/tensorflow/compiler/tf2xla/xla_op_registry.cc b/tensorflow/compiler/tf2xla/xla_op_registry.cc index 0dde6a986c..bbe808595d 100644 --- a/tensorflow/compiler/tf2xla/xla_op_registry.cc +++ b/tensorflow/compiler/tf2xla/xla_op_registry.cc @@ -255,6 +255,8 @@ void XlaOpRegistry::RegisterCompilationKernels() { std::vector XlaOpRegistry::DeviceKernels( const string& compilation_device_name, bool include_compilation_only_kernels) { + // Ensure compilation kernels registered. + RegisterCompilationKernels(); std::vector kernels; XlaOpRegistry& registry = Instance(); mutex_lock lock(registry.mutex_); diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 8eb5c11969..30ac270109 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1335,6 +1335,13 @@ tf_pyclif_proto_library( visibility = ["//visibility:public"], ) +tf_pyclif_proto_library( + name = "framework/kernel_def_pyclif", + proto_lib = ":protos_all_cc", + proto_srcfile = "framework/kernel_def.proto", + visibility = ["//visibility:public"], +) + tf_pyclif_proto_library( name = "framework/node_def_pyclif", proto_lib = ":protos_all_cc", -- GitLab From 8745e3426713068e7061b3aae368ebb4db8dc2cc Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 15 Feb 2018 13:43:47 -0800 Subject: [PATCH 0073/2939] Object-based saving: Switch to "everything is Checkpointable" The only sane way to use/test this is to have Variables be Checkpointable, so this CL includes a move of the base class to core. No public methods are exposed, and I've attempted to not throw any errors on __setattr__. Allows dynamic dependencies (track after restore) and restoring variables on assignment to a Checkpointable object, and includes the protocol buffer modifications necessary for saving information with each object. There are still some prominent TODOs: - Stop modifying the graph after the first save/restore (likely cache ops in Checkpointable objects) - Add some overridable methods for saving Python strings when restore() is called, fed when graph building rather than embedded as constants in the graph - Work on the initialization story for graph building. Currently the unit tests rely on collections for this. - Support for more objects, move the prototype modifications in checkpointable_test to core. The diff is larger than I was hoping (mostly deletions and unit tests); that could be reduced a bit (or at least "lines added" converted to "lines deleted") by diffbasing on cl/180950921, which was my first attempt at dynamic dependencies. This CL is more of a re-write than a modification, so sending that one out seems a bit silly. The unit tests are still good, though. PiperOrigin-RevId: 185893387 --- .../proto/checkpointable_object_graph.proto | 44 +- tensorflow/contrib/eager/python/BUILD | 13 +- .../contrib/eager/python/checkpointable.py | 773 ---------------- .../eager/python/checkpointable_test.py | 497 ---------- .../eager/python/checkpointable_utils.py | 413 +++++++++ .../eager/python/checkpointable_utils_test.py | 857 ++++++++++++++++++ tensorflow/python/BUILD | 25 + .../python/ops/resource_variable_ops.py | 6 + tensorflow/python/ops/variables.py | 22 +- tensorflow/python/training/checkpointable.py | 584 ++++++++++++ .../python/training/checkpointable_test.py | 39 + tensorflow/python/training/optimizer.py | 47 +- .../api/golden/tensorflow.-variable.pbtxt | 1 + ...tensorflow.train.-adadelta-optimizer.pbtxt | 1 + ...sorflow.train.-adagrad-d-a-optimizer.pbtxt | 1 + .../tensorflow.train.-adagrad-optimizer.pbtxt | 1 + .../tensorflow.train.-adam-optimizer.pbtxt | 1 + .../tensorflow.train.-ftrl-optimizer.pbtxt | 1 + ...ow.train.-gradient-descent-optimizer.pbtxt | 1 + ...tensorflow.train.-momentum-optimizer.pbtxt | 1 + .../golden/tensorflow.train.-optimizer.pbtxt | 1 + ...ow.train.-proximal-adagrad-optimizer.pbtxt | 1 + ...-proximal-gradient-descent-optimizer.pbtxt | 1 + ...nsorflow.train.-r-m-s-prop-optimizer.pbtxt | 1 + ...rflow.train.-sync-replicas-optimizer.pbtxt | 1 + tensorflow/tools/pip_package/BUILD | 2 +- 26 files changed, 2033 insertions(+), 1302 deletions(-) delete mode 100644 tensorflow/contrib/eager/python/checkpointable.py delete mode 100644 tensorflow/contrib/eager/python/checkpointable_test.py create mode 100644 tensorflow/contrib/eager/python/checkpointable_utils.py create mode 100644 tensorflow/contrib/eager/python/checkpointable_utils_test.py create mode 100644 tensorflow/python/training/checkpointable.py create mode 100644 tensorflow/python/training/checkpointable_test.py diff --git a/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto index 4f71aec96a..024765acb2 100644 --- a/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto +++ b/tensorflow/contrib/eager/proto/checkpointable_object_graph.proto @@ -4,9 +4,9 @@ option cc_enable_arenas = true; package tensorflow.contrib.eager; -// Prototype for an addition to BundleHeaderProto which saves extra information -// about the objects which own variables, allowing for more robust checkpoint -// loading into modified programs. +// Prototype format which saves extra information about the objects which own +// variables, allowing for more robust checkpoint loading into modified +// programs. Currently stored in its own entry in a TensorBundle. message CheckpointableObjectGraph { message Object { @@ -18,37 +18,35 @@ message CheckpointableObjectGraph { string local_name = 2; } - message VariableReference { - // A name for the variable which is unique within the object which owns - // it. Does not include a name_scope or variable_scope prefix. - string local_name = 1; - // The full name of the variable. Used to allow name-based loading of - // checkpoints which were saved using an object-based API. + message SerializedTensor { + // A name for the Tensor. Simple variables have only one + // `SerializedTensor` named "VARIABLE_VALUE" by convention. This value may + // be restored on object creation as an optimization. + string name = 1; + // The full name of the variable/tensor, if applicable. Used to allow + // name-based loading of checkpoints which were saved using an + // object-based API. Should match the checkpoint key which would have been + // assigned by tf.train.Saver. string full_name = 2; - // The generated name of the variable in the checkpoint. + // The generated name of the Tensor in the checkpoint. string checkpoint_key = 3; } message SlotVariableReference { - // An index into `CheckpointableObjectGraph.nodes`, indicating the object - // which created the variable that this variable is slotting for. + // An index into `CheckpointableObjectGraph.nodes`, indicating the + // variable object this slot was created for. int32 original_variable_node_id = 1; - // The local name of the variable being slotted for within the object that - // owns it. - string original_variable_local_name = 2; // The name of the slot (e.g. "m"/"v"). - string slot_name = 3; - // The full name of the slot variable. Used to allow name-based loading of - // checkpoints which were saved using an object-based API. - string full_name = 4; - // The generated name of the variable in the checkpoint. - string checkpoint_key = 5; + string slot_name = 2; + // An index into `CheckpointableObjectGraph.nodes`, indicating the + // `Object` with the value of the slot variable. + int32 slot_variable_node_id = 3; } // Objects which this object depends on. repeated ObjectReference children = 1; - // Non-slot variables owned by this object. - repeated VariableReference variables = 2; + // Serialized data specific to this object. + repeated SerializedTensor attributes = 2; // Slot variables owned by this object. repeated SlotVariableReference slot_variables = 3; } diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index cfb38a1d26..ad40e55cb4 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -220,18 +220,19 @@ py_test( ) py_library( - name = "checkpointable", - srcs = ["checkpointable.py"], + name = "checkpointable_utils", + srcs = ["checkpointable_utils.py"], srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ "//tensorflow/contrib/eager/proto:checkpointable_object_graph_proto_py", + "//tensorflow/python:constant_op", + "//tensorflow/python:control_flow_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", "//tensorflow/python:init_ops", "//tensorflow/python:io_ops", "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:state_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python:training", "//tensorflow/python:variable_scope", @@ -240,11 +241,11 @@ py_library( ) py_test( - name = "checkpointable_test", - srcs = ["checkpointable_test.py"], + name = "checkpointable_utils_test", + srcs = ["checkpointable_utils_test.py"], srcs_version = "PY2AND3", deps = [ - ":checkpointable", + ":checkpointable_utils", ":network", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", diff --git a/tensorflow/contrib/eager/python/checkpointable.py b/tensorflow/contrib/eager/python/checkpointable.py deleted file mode 100644 index 896b38a734..0000000000 --- a/tensorflow/contrib/eager/python/checkpointable.py +++ /dev/null @@ -1,773 +0,0 @@ -"""An object-local variable management scheme.""" -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import re -import weakref - -from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2 -from tensorflow.python.eager import context -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import io_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import optimizer as optimizer_lib -from tensorflow.python.training import saver as saver_lib -from tensorflow.python.training import slot_creator -from tensorflow.python.training import training - -_CheckpointableReference = collections.namedtuple( - "_CheckpointableReference", - [ - # The local name if explicitly specified, else None. - "name", - # The Checkpointable object being referenced. - "ref" - ]) - -# Validation regular expression for the local names of Checkpointable -# objects. In particular, disallows "/" in names, and reserves dash-prefixed -# names (which are not valid Python identifiers, so we're not restricting the -# __setattr__ syntax that way). -_VALID_LOCAL_NAME = re.compile(r"^[A-Za-z0-9_.][A-Za-z0-9_.-]*$") - -# Keyword for identifying that the next bit of a checkpoint variable name is a -# slot name. May not be the local name of a checkpointable. Checkpoint names for -# slot variables look like: -# -# /<_OPTIMIZER_SLOTS_NAME>// -# -# Where is a full path from the checkpoint root to the -# variable being slotted for. -_OPTIMIZER_SLOTS_NAME = "-OPTIMIZER_SLOT" - - -def _assign_existing_variable(variable_to_restore, value_pointer): - """Set a variable from a _ValuePointer object.""" - base_type = variable_to_restore.dtype.base_dtype - with ops.colocate_with(variable_to_restore): - # TODO(allenl): Handle partitioned variables - value_to_restore, = io_ops.restore_v2( - prefix=value_pointer.save_path, - tensor_names=[value_pointer.checkpoint_key], - shape_and_slices=[""], - dtypes=[base_type], - name="checkpoint_initializer") - initializer_op = state_ops.assign(variable_to_restore, value_to_restore) - variable_to_restore._initializer_op = initializer_op # pylint:disable=protected-access - if value_pointer.session is not None: - value_pointer.session.run(initializer_op) - - -def _default_getter(name, shape, dtype, initializer=None, - partition_info=None, **kwargs): - """A pared-down version of get_variable which does not reuse variables.""" - dtype = dtypes.as_dtype(dtype) - shape_object = tensor_shape.as_shape(shape) - with ops.init_scope(): - if initializer is None: - initializer, initializing_from_value = ( - variable_scope._get_default_variable_store()._get_default_initializer( # pylint: disable=protected-access - name=name, shape=shape_object, dtype=dtype)) - else: - initializing_from_value = not callable(initializer) - # Same logic as get_variable - if initializing_from_value: - if shape is not None: - raise ValueError("If initializer is a constant, do not specify shape.") - initial_value = initializer - variable_dtype = None - else: - # Instantiate initializer if provided initializer is a type object. - if isinstance(initializer, type(init_ops.Initializer)): - initializer = initializer(dtype=dtype) - def initial_value(): - return initializer( - shape_object.as_list(), dtype=dtype, partition_info=partition_info) - variable_dtype = dtype.base_dtype - return resource_variable_ops.ResourceVariable( - initial_value=initial_value, - name=name, - dtype=variable_dtype, - **kwargs - ) - - -class Checkpointable(object): - """Manages variables and dependencies on other objects. - - To make reliable checkpoints, all `Checkpointable`s on which this object - depends must be registered in the constructor using `track_checkpointable` in - a deterministic order, and if possible they should be named. Variables may be - created using `add_variable` outside of the constructor and in any order, but - only these variables will be saved. - """ - - def __init__(self): - # A list of _CheckpointableReference objects. - self._checkpoint_dependencies = [] - # Maps names -> Checkpointable objects for named dependencies - self._dependency_names = {} - # Set of all tracked Checkpointables - self._already_tracked = set() - self._owned_variables = {} # local name -> variable object - self._deferred_restorations = {} # local name -> _VariableRestoration - # object - - def __setattr__(self, name, value): - """Support self.foo = checkpointable syntax. - - `self.foo = checkpointable` is equivalent to - `self.foo = self.track_checkpointable(checkpointable, name='foo')`. - - No new tracking if `value` is not a `Checkpointable`, or if `value` is - already being tracked (either because of an explicit `track_checkpointable` - or a previous `__setattr__`). - - Args: - name: The name of the property being set. - value: The new value for the property. - """ - # Give child classes (e.g. Network) priority, then track only if the object - # hasn't been added to _already_tracked. - super(Checkpointable, self).__setattr__(name, value) - if (isinstance(value, Checkpointable) - and value not in self._already_tracked): - self.track_checkpointable(value, name=name) - - def add_variable(self, name, shape=None, dtype=dtypes.float32, - initializer=None, **kwargs): - """Create a new variable object to be saved with this `Checkpointable`. - - If the user has requested that this object or another `Checkpointable` which - depends on this object be restored from a checkpoint (deferred loading - before variable object creation), `initializer` may be ignored and the value - from the checkpoint used instead. - - Args: - name: A name for the variable. Must be unique within this object. - shape: The shape of the variable. - dtype: The data type of the variable. - initializer: The initializer to use. Ignored if deferred loading has been - requested. - **kwargs: Passed to the ResourceVariable constructor. - - Returns: - The new variable object. - - Raises: - ValueError: If the variable name is not unique. - RuntimeError: If __init__ has not been called. - """ - if not hasattr(self, "_owned_variables"): - raise RuntimeError("Need to call Checkpointable.__init__ before adding " - "variables.") - if name in self._owned_variables: - raise ValueError( - ("A variable named '%s' already exists in this Checkpointable, but " - "Checkpointable.add_variable called to create another with " - "that name. Variable names must be unique within a Checkpointable " - "object.") % (name,)) - if "getter" in kwargs: - # Allow the getter to be overridden, typically because there is a need for - # compatibility with some other variable creation mechanism. This should - # be relatively uncommon in user code. - getter = kwargs.pop("getter") - else: - getter = _default_getter - deferred_restoration = self._deferred_restorations.pop(name, None) - if deferred_restoration is not None: - dtype = deferred_restoration.value_pointer.dtype - base_type = dtype.base_dtype - # TODO(allenl): Handle partitioned variables here too - with ops.init_scope(): - initializer, = io_ops.restore_v2( - prefix=deferred_restoration.value_pointer.save_path, - tensor_names=[deferred_restoration.value_pointer.checkpoint_key], - shape_and_slices=[""], - dtypes=[base_type], - name="checkpoint_initializer") - # We need to un-set the shape so get_variable doesn't complain, but we - # also need to set the static shape information on the initializer if - # possible so we don't get a variable with an unknown shape. - initializer.set_shape(shape) - # Un-set shape since we're using a constant initializer - shape = None - - new_variable = getter( - name=name, shape=shape, dtype=dtype, initializer=initializer, **kwargs) - if deferred_restoration is not None: - if deferred_restoration.value_pointer.session is not None: - deferred_restoration.value_pointer.session.run(new_variable.initializer) - for slot_restoration in deferred_restoration.slot_restorations: - strong_ref = slot_restoration.optimizer_ref() - if strong_ref is None: - # If the optimizer object has been garbage collected, there's no need - # to create the slot variable. - continue - strong_ref._process_slot_restoration( # pylint: disable=protected-access - slot_restoration, new_variable) - self._owned_variables[name] = new_variable - return new_variable - - def track_checkpointable(self, checkpointable, name): - """Declare a dependency on another `Checkpointable` object. - - Indicates that checkpoints for this object should include variables from - `checkpointable`. - - Variables in a checkpoint are mapped to `Checkpointable`s based on names. To - avoid breaking existing checkpoints when modifying a class, neither variable - names nor dependency names (the names passed to `track_checkpointable`) may - change. - - Args: - checkpointable: A `Checkpointable` which this object depends on. - name: A local name for `checkpointable`, used for loading checkpoints into - the correct objects. Python 2 identifiers are valid names, with the - addition of leading numerals, periods anywhere, and non-leading dashes. - Specifically names must match the regular expression - `^[A-Za-z0-9_.][A-Za-z0-9_.-]*$`. - - Returns: - `checkpointable`, for convenience when declaring a dependency and - assigning to a member variable in one statement. - - Raises: - RuntimeError: If __init__ was not called. - TypeError: If `checkpointable` does not inherit from `Checkpointable`. - ValueError: For invalid names. - """ - if not hasattr(self, "_checkpoint_dependencies"): - raise RuntimeError("Need to call Checkpointable.__init__ before calling " - "Checkpointable.track_checkpointable().") - if not isinstance(checkpointable, Checkpointable): - raise TypeError( - ("Checkpointable.track_checkpointable() passed type %s, not a " - "Checkpointable.") % (type(checkpointable),)) - if not _VALID_LOCAL_NAME.match(name): - raise ValueError( - ("Checkpointable names must match the regular expression '%s', but " - "got an invalid name '%s' instead.") % (_VALID_LOCAL_NAME.pattern, - name)) - if (name in self._dependency_names - and self._dependency_names[name] is not checkpointable): - raise ValueError( - ("Called Checkpointable.track_checkpointable() with name='%s', but " - "a Checkpointable with this name is already declared as a " - "dependency. Names must be unique.") % (name,)) - self._dependency_names[name] = checkpointable - self._checkpoint_dependencies.append( - _CheckpointableReference(name=name, ref=checkpointable)) - self._already_tracked.add(checkpointable) - return checkpointable - - def _process_restoration(self, restoration): - """Restore a variable and its slot variables (may be deferred).""" - variable_to_restore = self._owned_variables.get(restoration.name, None) - if variable_to_restore is not None: - # This variable already exists, so just do an assignment for this and any - # slot variables which depend on it. - _assign_existing_variable( - variable_to_restore, value_pointer=restoration.value_pointer) - for slot_restoration in restoration.slot_restorations: - strong_ref = slot_restoration.optimizer_ref() - if strong_ref is None: - continue - strong_ref._process_slot_restoration( # pylint: disable=protected-access - slot_restoration, variable_to_restore) - else: - # Save this restoration for later. This intentionally overwrites any - # previous deferred restorations, since that gives the same semantics as - # direct assignment. - self._deferred_restorations[restoration.name] = restoration - - def _process_slot_restoration(self, slot_restoration, variable): - """Restore a slot variable's value (creating it if necessary).""" - # TODO(allenl): Move this to Optimizer - assert isinstance(self, optimizer_lib.Optimizer) - named_slots = self._slot_dict(slot_restoration.slot_name) - variable_key = optimizer_lib._var_key(variable) # pylint: disable=protected-access - existing_slot_variable = named_slots.get(variable_key, None) - if existing_slot_variable is None: - base_dtype = slot_restoration.value_pointer.dtype.base_dtype - initializer, = io_ops.restore_v2( - prefix=slot_restoration.value_pointer.save_path, - tensor_names=[slot_restoration.value_pointer.checkpoint_key], - shape_and_slices=[""], - dtypes=[base_dtype], - name="checkpoint_initializer") - new_slot_variable = slot_creator.create_slot(variable, initializer, - slot_restoration.slot_name) - if slot_restoration.value_pointer.session is not None: - slot_restoration.value_pointer.session.run( - new_slot_variable.initializer) - named_slots[variable_key] = new_slot_variable - else: - _assign_existing_variable( - existing_slot_variable, value_pointer=slot_restoration.value_pointer) - - @property - def checkpoint_dependencies(self): - """Other `Checkpointable` objects on which this object depends.""" - return self._checkpoint_dependencies - - -def _breadth_first_checkpointable_traversal(root_checkpointable): - """Find shortest paths to all variables owned by dependencies of root.""" - bfs_sorted = [] - root_checkpointable_reference = _CheckpointableReference( - name=None, ref=root_checkpointable) - to_visit = collections.deque([root_checkpointable_reference]) - path_to_root = {root_checkpointable_reference: ()} - while to_visit: - current_checkpointable = to_visit.popleft() - bfs_sorted.append(current_checkpointable) - for child_checkpointable in ( - current_checkpointable.ref.checkpoint_dependencies): - if child_checkpointable not in path_to_root: - path_to_root[child_checkpointable] = ( - path_to_root[current_checkpointable] + (child_checkpointable,)) - to_visit.append(child_checkpointable) - return bfs_sorted, path_to_root - - -def _object_prefix_from_path(path_to_root): - return "/".join( - (checkpointable.name for checkpointable in path_to_root)) - - -def _escape_variable_name(variable_name): - # We need to support slashes in variable names for compatibility, since this - # naming scheme is being patched in to things like Layer.add_variable where - # slashes were previously accepted. We also want to use slashes to indicate - # edges traversed to reach the variable, so we escape forward slashes in - # variable names. - return variable_name.replace("_S_", "_S_.").replace(r"/", r"_S__") - - -def _variable_naming_for_object(path_to_root): - """Make a function for naming variables in an object.""" - # Name non-slot variables: - # - # / - # - # is not necessarily unique, but this is fine since we also - # save the graph of `Checkpointable`s with the checkpoint. Even if this path - # no longer exists because of a change in the Python program, we can look up - # the `Checkpointable` which owns the variable in the checkpoint's graph and - # use another path if one still exists. - - object_prefix = _object_prefix_from_path(path_to_root) - if object_prefix: - object_prefix += "/" - - def _name_single_variable(local_name): - """Names a variable within an object.""" - return object_prefix + _escape_variable_name(local_name) - - return _name_single_variable - - -def _slot_variable_naming_for_optimizer(optimizer, path_to_root): - """Make a function for naming slot variables in an optimizer.""" - # Name slot variables: - # - # /<_OPTIMIZER_SLOTS_NAME>// - # - # where is exactly the checkpoint name used for the original - # variable, including the path from the checkpoint root and the local name in - # the object which owns it. Note that we only save slot variables if the - # variable it's slotting for is also being saved. - - optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, - _object_prefix_from_path(path_to_root)) - - def _name_slot_variable(variable_path, slot_name): - """With an optimizer specified, name a slot variable.""" - - if not _VALID_LOCAL_NAME.match(slot_name): - # Slot variable names include the name of the slot. We need to - # validate that part of the name to be sure that the checkpoint name - # is a valid name scope name. - raise ValueError( - ("Could not save slot variables for optimizer %s, because its " - "slot name has invalid characters (got '%s', was expecting it " - "to match the regular expression '%s').") % - (optimizer, slot_name, _VALID_LOCAL_NAME.pattern)) - - return variable_path + optimizer_identifier + slot_name - - return _name_slot_variable - - -def _serialize_non_slot_variables(checkpointable_objects, path_to_root, - object_graph_proto): - """Name non-slot variables and add them to `object_graph_proto`.""" - named_variables = {} - non_slot_variables = [] - checkpoint_node_ids = {} - - for checkpoint_id, checkpointable in enumerate(checkpointable_objects): - checkpoint_node_ids[checkpointable] = checkpoint_id - - for checkpoint_id, checkpointable in enumerate(checkpointable_objects): - naming_scheme = _variable_naming_for_object(path_to_root[checkpointable]) - object_proto = object_graph_proto.nodes.add() - for (local_name, owned_variable) in sorted( - checkpointable.ref._owned_variables.items(), # pylint: disable=protected-access - key=lambda x: x[0]): - variable_name = naming_scheme(local_name) - named_variables[variable_name] = owned_variable - non_slot_variables.append(( - variable_name, # The variable's full checkpoint name - owned_variable, # The variable object - local_name, # The variable's local name - checkpoint_id)) # The checkpoint ID of the node which owns this - # variable. - variable_proto = object_proto.variables.add() - variable_proto.local_name = local_name - variable_proto.checkpoint_key = variable_name - # Figure out the name-based Saver's name for this variable. - saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( - [owned_variable], convert_variable_to_tensor=False) - variable_full_name, = saver_dict.keys() - variable_proto.full_name = variable_full_name - - for child in checkpointable.ref.checkpoint_dependencies: - child_proto = object_proto.children.add() - child_proto.node_id = checkpoint_node_ids[child] - child_proto.local_name = child.name - return named_variables, non_slot_variables - - -def _serialize_slot_variables(checkpointable_objects, path_to_root, - non_slot_variables, object_graph_proto): - """Name slot variables and add them to `object_graph_proto`.""" - named_slot_variables = {} - for optimizer_checkpoint_id, checkpointable_ref in enumerate( - checkpointable_objects): - if isinstance(checkpointable_ref.ref, optimizer_lib.Optimizer): - optimizer_object_proto = object_graph_proto.nodes[optimizer_checkpoint_id] - naming_scheme = _slot_variable_naming_for_optimizer( - optimizer=checkpointable_ref.ref, - path_to_root=path_to_root[checkpointable_ref]) - slot_names = checkpointable_ref.ref.get_slot_names() - for (variable_path, original_variable, original_variable_local_name, - original_node_checkpoint_id) in non_slot_variables: - for slot_name in slot_names: - slot_variable = checkpointable_ref.ref.get_slot( - original_variable, slot_name) - if slot_variable is not None: - checkpoint_name = naming_scheme( - variable_path=variable_path, slot_name=slot_name) - named_slot_variables[checkpoint_name] = slot_variable - slot_variable_proto = optimizer_object_proto.slot_variables.add() - slot_variable_proto.slot_name = slot_name - slot_variable_proto.checkpoint_key = checkpoint_name - # Figure out the name-based Saver's name for this variable. - saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( - [slot_variable], convert_variable_to_tensor=False) - slot_variable_full_name, = saver_dict.keys() - slot_variable_proto.full_name = slot_variable_full_name - slot_variable_proto.original_variable_local_name = ( - original_variable_local_name) - slot_variable_proto.original_variable_node_id = ( - original_node_checkpoint_id) - return named_slot_variables - - -# TODO(allenl): Convenience utility for saving multiple objects (i.e. construct -# a root Checkpointable if passed a list of Checkpointables). -def _serialize_object_graph(root_checkpointable): - """Determine checkpoint keys for variables and build a serialized graph. - - Non-slot variables are keyed based on a shortest path from the root saveable - to the object which owns the variable (i.e. the one which called - `Checkpointable.add_variable` to create it). - - Slot variables are keyed based on a shortest path to the variable being - slotted for, a shortest path to their optimizer, and the slot name. - - Args: - root_checkpointable: A `Checkpointable` object whose variables (including - the variables of dependencies, recursively) should be saved. - - Returns: - A tuple of (named_variables, object_graph_proto): - named_variables: A dictionary mapping names to variable objects. - object_graph_proto: A CheckpointableObjectGraph protocol buffer containing - the serialized object graph and variable references. - - Raises: - ValueError: If there are invalid characters in an optimizer's slot names. - """ - checkpointable_objects, path_to_root = ( - _breadth_first_checkpointable_traversal(root_checkpointable)) - object_graph_proto = ( - checkpointable_object_graph_pb2.CheckpointableObjectGraph()) - - # Gather non-slot variables. - named_variables, non_slot_variables = _serialize_non_slot_variables( - checkpointable_objects, path_to_root, object_graph_proto) - - # Gather slot variables which are associated with variables gathered above. - named_slot_variables = _serialize_slot_variables( - checkpointable_objects, path_to_root, non_slot_variables, - object_graph_proto) - - named_variables.update(named_slot_variables) - return named_variables, object_graph_proto - - -def _set_reference(reference_proto_table, key, checkpointable, parent, - object_id_map): - """Record a checkpoint<->object correspondence, with error checking. - - Args: - reference_proto_table: Map from names or numbers to `ObjectReference` protos - within the parent object. - key: Either a numeric or string identifier for the reference. - checkpointable: The object to record a correspondence for. - parent: The parent Python object, for creating a useful error message. - object_id_map: The map from `node_id` to Python object in which to record - the reference. - Returns: - The `node_id` of the Object proto corresponding to the specified Python - object. - Raises: - AssertionError: If another object is already bound to the `Object` proto. - """ - reference_proto = reference_proto_table[key] - set_reference = object_id_map.setdefault(reference_proto.node_id, - checkpointable) - if set_reference is not checkpointable: - raise AssertionError( - ("Unable to load the checkpoint into this object graph. Either " - "the Checkpointable object references in the Python program " - "have changed in an incompatible way, or the checkpoint was " - "generated in an incompatible program.\n\nTwo checkpoint " - "references (one being '%s' in %s) resolved to different " - "objects (%s and %s).") % (key, parent, set_reference, - checkpointable)) - return reference_proto.node_id - - -def _checkpoint_object_id_map(root_checkpointable, object_graph_proto): - """Match a checkpointed object graph to a Python object graph. - - Args: - root_checkpointable: A Checkpointable object. - object_graph_proto: A CheckpointableObjectGraph protocol buffer representing - a serialized object graph. - Returns: - A dictionary mapping from checkpoint node ids (indices into - `object_graph_proto.nodes`) to `Checkpointable` objects which are - dependencies of `root_checkpointable`. - """ - node_list = object_graph_proto.nodes - # Queue of (checkpointable object, node id) - to_visit = collections.deque([(root_checkpointable, 0)]) - object_id_map = {0: root_checkpointable} - seen = set() - while to_visit: - checkpointable, node_id = to_visit.popleft() - object_proto = node_list[node_id] - named_children = {} - for child_reference in object_proto.children: - if child_reference.local_name: - named_children[child_reference.local_name] = child_reference - else: - raise AssertionError( - ("The checkpointed object graph contains a reference without " - "a name (corrupted?). The reference was from the node %s.") - % (object_proto,)) - - for checkpointable_reference in checkpointable._checkpoint_dependencies: # pylint: disable=protected-access - child_node_id = _set_reference( - reference_proto_table=named_children, - key=checkpointable_reference.name, - checkpointable=checkpointable_reference.ref, - parent=checkpointable, - object_id_map=object_id_map) - if child_node_id not in seen: - seen.add(child_node_id) - to_visit.append((checkpointable_reference.ref, child_node_id)) - - return object_id_map - - -_ValuePointer = collections.namedtuple( - "_ValuePointer", - [ - # Information needed to look up the value to restore. - "save_path", - "checkpoint_key", - "dtype", - # The session to use when restoring (None when executing eagerly) - "session", - ]) - -_SlotVariableRestoration = collections.namedtuple( - "_SlotVariableRestoration", - [ - # A weak reference to the Optimizer object - "optimizer_ref", - # The slot name - "slot_name", - # The _ValuePointer to use when restoring - "value_pointer", - ]) - -_VariableRestoration = collections.namedtuple( - "_VariableRestoration", - [ - # The variable's (local) name. - "name", - # _SlotVariableRestoration objects indicating slot variables which - # should be created once this variable has been restored. - "slot_restorations", - # The _ValuePointer to use when restoring - "value_pointer", - ]) - - -def _gather_restorations(object_graph_proto, save_path, object_id_map, - dtype_map, session): - """Iterate over variables to restore, matching with Checkpointable objects.""" - variable_to_slot_restorations = {} - for node_id, node in enumerate(object_graph_proto.nodes): - for slot_variable in node.slot_variables: - original_variable_key = (slot_variable.original_variable_node_id, - slot_variable.original_variable_local_name) - variable_to_slot_restorations.setdefault( - original_variable_key, []).append( - _SlotVariableRestoration( - optimizer_ref=weakref.ref(object_id_map[node_id]), - slot_name=slot_variable.slot_name, - value_pointer=_ValuePointer( - save_path=save_path, - checkpoint_key=slot_variable.checkpoint_key, - dtype=dtype_map[slot_variable.checkpoint_key], - session=session))) - - for node_id, node in enumerate(object_graph_proto.nodes): - for variable in node.variables: - slots_key = (node_id, variable.local_name) - variable_restore = _VariableRestoration( - name=variable.local_name, - slot_restorations=variable_to_slot_restorations.get(slots_key, []), - value_pointer=_ValuePointer( - save_path=save_path, - checkpoint_key=variable.checkpoint_key, - dtype=dtype_map[variable.checkpoint_key], - session=session)) - yield variable_restore, object_id_map[node_id] - - -def save(file_prefix, root_checkpointable, global_step=None, session=None): - """Save a training checkpoint. - - Args: - file_prefix: A prefix to use for the checkpoint filenames - (/path/to/directory/and_a_prefix). Names are generated based on this - prefix and the global step, if provided. - root_checkpointable: A Checkpointable object to save. The checkpoint - includes variables created by this object and any Checkpointable objects - it depends on. - global_step: An integer variable or Tensor, used to number - checkpoints. Typically this value is saved along with other variables in - training checkpoints, which will happen automatically if it was created by - `root_checkpointable` or one of its dependencies (via - `Checkpointable.add_variable`). - session: The session to evaluate variables in. Ignored when executing - eagerly. If not provided when graph building, the default session is used. - - Returns: - The full path to the checkpoint. - - Currently also returns the serialized object graph proto, but that will go - away once it's saved with the checkpoint. - """ - named_variables, serialized_graph = _serialize_object_graph( - root_checkpointable) - if context.in_graph_mode(): - if session is None: - session = ops.get_default_session() - else: - session = None - with ops.device("/device:CPU:0"): - save_path = saver_lib.Saver(var_list=named_variables).save( - sess=session, - save_path=file_prefix, - write_meta_graph=False, - global_step=global_step) - # TODO(allenl): Save the graph with the checkpoint, then returning it and - # taking it as an argument to restore won't be necessary. - return serialized_graph, save_path - - -# NOTE: Will be restore(file_prefix, root_checkpointable) once the object graph -# is saved with the checkpoint. -def restore(save_path, root_checkpointable, object_graph_proto, session=None): - """Restore a training checkpoint. - - Restores the values of variables created with `Checkpointable.add_variable` in - the dependency graph of `root_checkpointable`. Either assigns values - immediately (if variables to restore have been created already), or defers - restoration until the variables are created. - - When building a graph, restorations are executed in the default session if - `session` is `None`. Variable initializers read checkpointed values. - - Args: - save_path: The path to the checkpoint, as returned by `save` or - `tf.train.latest_checkpoint`. If None (as when there is no latest - checkpoint for `tf.train.latest_checkpoint` to return), does nothing. - root_checkpointable: The root of the object graph to restore. Variables to - restore need not have been created yet, but all dependencies on other - Checkpointable objects should already be declared. Objects in the - dependency graph are matched to objects in the checkpointed graph, and - matching objects have their variables restored (or the checkpointed values - saved for eventual restoration when the variable is created). - object_graph_proto: (Temporary) the checkpointed object graph. This will - eventually be saved with the checkpoint, and will not be part of the final - API. - session: The session to evaluate assignment ops in. Ignored when executing - eagerly. If not provided when graph building, the default session is used. - """ - if save_path is None: - return - object_id_map = _checkpoint_object_id_map(root_checkpointable, - object_graph_proto) - reader = training.NewCheckpointReader(save_path) - dtype_map = reader.get_variable_to_dtype_map() - if context.in_graph_mode(): - if session is None: - session = ops.get_default_session() - else: - session = None - for restoration, checkpointable in _gather_restorations( - object_graph_proto, save_path, object_id_map, dtype_map, session=session): - checkpointable._process_restoration(restoration) # pylint: disable=protected-access - diff --git a/tensorflow/contrib/eager/python/checkpointable_test.py b/tensorflow/contrib/eager/python/checkpointable_test.py deleted file mode 100644 index f7bc155dec..0000000000 --- a/tensorflow/contrib/eager/python/checkpointable_test.py +++ /dev/null @@ -1,497 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import os - -import six - -from tensorflow.contrib.eager.python import checkpointable -from tensorflow.contrib.eager.python import network as network_lib -from tensorflow.python.eager import context -from tensorflow.python.eager import test -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util -from tensorflow.python.layers import base -from tensorflow.python.layers import core -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import resource_variable_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.training import adam -from tensorflow.python.training import saver as core_saver -from tensorflow.python.training import training_util - - -class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable): - - def __init__(self, *args, **kwargs): - checkpointable.Checkpointable.__init__(self) - core.Dense.__init__(self, *args, **kwargs) - - def add_variable(self, name, shape, **kwargs): - # Calls both Checkpointable.add_variable and Layer.add_variable. Eventually - # Layer.add_variable should inherit from Checkpointable and simply call - # super and then do post-processing. - return checkpointable.Checkpointable.add_variable( - self, - name=name, - shape=shape, - getter=functools.partial(core.Dense.add_variable, self), - **kwargs) - - -# pylint: disable=not-callable -class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): - - def __init__(self): - network_lib.Network.__init__(self) - checkpointable.Checkpointable.__init__(self) - - def __setattr__(self, name, value): - if isinstance(value, base.Layer) and value not in self._already_tracked: - self.track_layer(value, name=name) - # Checkpointable is next in the method resolution order, so this will catch - # Checkpointable objects which aren't Layers. - super(CheckpointableNetwork, self).__setattr__(name, value) - - def track_layer(self, layer, name): - self.track_checkpointable(layer, name=name) - return super(CheckpointableNetwork, self).track_layer(layer) - - -class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable): - - def __init__(self, *args, **kwargs): - checkpointable.Checkpointable.__init__(self) - adam.AdamOptimizer.__init__(self, *args, **kwargs) - - # NOTE: Copied from Optimizer with modifications to use add_variable - # for non-slot variables. These contortions are necessary to maintain - # checkpoint compatibility with variable.name based saving. - # TODO(allenl): Make this cleaner. - def _create_non_slot_variable(self, initial_value, name, colocate_with): - """Add an extra variable, not associated with a slot.""" - if context.in_graph_mode(): - graph = colocate_with.graph - else: - graph = None - - key = (name, graph) - v = self._non_slot_dict.get(key, None) - if v is None: - with ops.colocate_with(colocate_with): - def _variable_getter(name, shape, dtype, initializer): - del shape, dtype # not used, but there for compatibility - return variable_scope.variable( - name=name, initial_value=initializer, trainable=False) - - initial_value = ops.convert_to_tensor(initial_value) - v = self.add_variable( - name=name, - shape=initial_value.get_shape(), - initializer=initial_value, - getter=_variable_getter) - - self._non_slot_dict[key] = v - - return v - - -class NonLayerCheckpointable(checkpointable.Checkpointable): - - def __init__(self): - super(NonLayerCheckpointable, self).__init__() - self.a_variable = self.add_variable(name="a_variable", shape=[]) - - -class MyNetwork(CheckpointableNetwork): - """A concrete Network for testing.""" - - def __init__(self): - super(MyNetwork, self).__init__() - self._named_dense = CheckpointableDenseLayer(1, use_bias=True) - self._via_track_layer = self.track_layer( - CheckpointableDenseLayer(1, use_bias=False), name="via_track_layer") - # We can still track Checkpointables which aren't Layers. - self._non_layer = NonLayerCheckpointable() - - def call(self, values): - return self._via_track_layer(self._named_dense(values)) - - -class Root(checkpointable.Checkpointable): - """A stand-in for a Trainer class.""" - - def __init__(self, optimizer, network): - super(Root, self).__init__() - self._optimizer = optimizer - self._network = self.track_checkpointable(network, "network") - self._global_step = None - - @property - def global_step(self): - if self._global_step is None: - # Get the default create_global_step utility to actually call - # self.add_variable, by setting a custom creator. - def _owned_variable_as_creator( - next_creator, initial_value, **kwargs): - def _creator_as_getter(initializer, **kwargs): - return next_creator(initial_value=initializer, **kwargs) - return self.add_variable( - getter=_creator_as_getter, initializer=initial_value, shape=[], - **kwargs) - - with variable_scope.variable_creator_scope( - _owned_variable_as_creator): - self._global_step = training_util.create_global_step() - return self._global_step - - -class InterfaceTests(test.TestCase): - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testAddVariable(self): - obj = NonLayerCheckpointable() - with self.assertRaisesRegexp(ValueError, "do not specify shape"): - obj.add_variable( - name="shape_specified_twice", shape=[], initializer=1) - constant_initializer = obj.add_variable( - name="constant_initializer", initializer=1) - with variable_scope.variable_scope("some_variable_scope"): - ones_initializer = obj.add_variable( - name="ones_initializer", - shape=[2], - initializer=init_ops.ones_initializer(dtype=dtypes.float32)) - bare_initializer = obj.add_variable( - name="bare_initializer", - shape=[2, 2], - dtype=dtypes.float64, - initializer=init_ops.zeros_initializer) - - # Even in graph mode, there are no naming conflicts between objects, only - # naming conflicts within an object. - other_duplicate = resource_variable_ops.ResourceVariable( - name="duplicate", initial_value=1.) - duplicate = obj.add_variable(name="duplicate", shape=[]) - with self.assertRaisesRegexp(ValueError, "'duplicate' already exists"): - obj.add_variable(name="duplicate", shape=[]) - - if context.in_graph_mode(): - self.evaluate(variables.global_variables_initializer()) - self.assertEqual("constant_initializer:0", constant_initializer.name) - self.assertEqual(1, self.evaluate(constant_initializer)) - self.assertEqual("some_variable_scope/ones_initializer:0", - ones_initializer.name) - self.assertAllEqual([1, 1], self.evaluate(ones_initializer)) - self.assertAllEqual([[0., 0.], - [0., 0.]], self.evaluate(bare_initializer)) - self.assertEqual("a_variable:0", obj.a_variable.name) - self.assertEqual("duplicate:0", other_duplicate.name) - if context.in_graph_mode(): - # The .name attribute may be globally influenced, but the checkpoint name - # won't be (tested below). - self.assertEqual("duplicate_1:0", duplicate.name) - else: - # When executing eagerly, there's no uniquification of variable names. The - # checkpoint name will be the same. - self.assertEqual("duplicate:0", duplicate.name) - named_variables, _ = checkpointable._serialize_object_graph(obj) - expected_checkpoint_names = ( - "a_variable", - "bare_initializer", - "constant_initializer", - "duplicate", - "ones_initializer", - ) - six.assertCountEqual( - self, expected_checkpoint_names, named_variables.keys()) - - def testInitNotCalled(self): - - class NoInit(checkpointable.Checkpointable): - - def __init__(self): - pass - - with self.assertRaisesRegexp(RuntimeError, "__init__"): - NoInit().add_variable("var", shape=[]) - - -class CheckpointingTests(test.TestCase): - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testNamingWithOptimizer(self): - input_value = constant_op.constant([[3.]]) - network = MyNetwork() - # A nuisance Network using the same optimizer. Its slot variables should not - # go in the checkpoint, since it is never depended on. - other_network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root_checkpointable = Root(optimizer=optimizer, network=network) - if context.in_eager_mode(): - optimizer.minimize( - lambda: network(input_value), - global_step=root_checkpointable.global_step) - optimizer.minimize( - lambda: other_network(input_value), - global_step=root_checkpointable.global_step) - else: - train_op = optimizer.minimize( - network(input_value), global_step=root_checkpointable.global_step) - optimizer.minimize( - other_network(input_value), - global_step=root_checkpointable.global_step) - self.evaluate(variables.global_variables_initializer()) - self.evaluate(train_op) - named_variables, serialized_graph = checkpointable._serialize_object_graph( - root_checkpointable) - expected_checkpoint_names = ( - # Created in the root node, so no prefix. - "global_step", - # No name provided to track_checkpointable(), so the position is used - # instead (one-based). - "network/via_track_layer/kernel", - # track_checkpointable() with a name provided, so that's used - "network/_named_dense/kernel", - "network/_named_dense/bias", - # non-Layer dependency of the network - "network/_non_layer/a_variable", - # The optimizer creates two non-slot variables - "_optimizer/beta1_power", - "_optimizer/beta2_power", - # Slot variables - "network/via_track_layer/kernel/-OPTIMIZER_SLOT/_optimizer/m", - "network/via_track_layer/kernel/-OPTIMIZER_SLOT/_optimizer/v", - "network/_named_dense/kernel/-OPTIMIZER_SLOT/_optimizer/m", - "network/_named_dense/kernel/-OPTIMIZER_SLOT/_optimizer/v", - "network/_named_dense/bias/-OPTIMIZER_SLOT/_optimizer/m", - "network/_named_dense/bias/-OPTIMIZER_SLOT/_optimizer/v", - ) - six.assertCountEqual(self, expected_checkpoint_names, - named_variables.keys()) - # Check that we've mapped to the right variable objects (not exhaustive) - self.assertEqual("global_step:0", named_variables["global_step"].name) - self.assertEqual("my_network/checkpointable_dense_layer_1/kernel:0", - named_variables["network/via_track_layer/kernel"].name) - self.assertEqual("my_network/checkpointable_dense_layer/kernel:0", - named_variables["network/_named_dense/kernel"].name) - self.assertEqual("beta1_power:0", - named_variables["_optimizer/beta1_power"].name) - self.assertEqual("beta2_power:0", - named_variables["_optimizer/beta2_power"].name) - # Spot check the generated protocol buffers. - self.assertEqual("_optimizer", - serialized_graph.nodes[0].children[0].local_name) - optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[ - 0].node_id] - self.assertEqual("beta1_power", optimizer_node.variables[0].local_name) - self.assertEqual("beta1_power", optimizer_node.variables[0].full_name) - # Variable ordering is arbitrary but deterministic (alphabetized) - self.assertEqual( - "bias", optimizer_node.slot_variables[0].original_variable_local_name) - original_variable_owner = serialized_graph.nodes[ - optimizer_node.slot_variables[0].original_variable_node_id] - self.assertEqual("network/_named_dense/bias", - original_variable_owner.variables[0].checkpoint_key) - self.assertEqual("bias", original_variable_owner.variables[0].local_name) - self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) - self.assertEqual("network/_named_dense/bias/-OPTIMIZER_SLOT/_optimizer/m", - optimizer_node.slot_variables[0].checkpoint_key) - # We strip off the :0 suffix, as variable.name-based saving does. - self.assertEqual("my_network/checkpointable_dense_layer/bias/Adam", - optimizer_node.slot_variables[0].full_name) - self.assertEqual("my_network/checkpointable_dense_layer/bias/Adam:0", - optimizer.get_slot( - var=named_variables["network/_named_dense/bias"], - name="m").name) - - @test_util.run_in_graph_and_eager_modes() - def testSaveRestore(self): - network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root_checkpointable = Root(optimizer=optimizer, network=network) - input_value = constant_op.constant([[3.]]) - if context.in_eager_mode(): - optimizer.minimize( - lambda: network(input_value), - global_step=root_checkpointable.global_step) - else: - train_op = optimizer.minimize( - network(input_value), global_step=root_checkpointable.global_step) - self.evaluate(variables.global_variables_initializer()) - self.evaluate(train_op) - prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(network._named_dense.variables[1], "m") - self.evaluate(state_ops.assign(m_bias_slot, [1.5])) - serialized_graph, save_path = checkpointable.save( - file_prefix=prefix, - root_checkpointable=root_checkpointable, - global_step=root_checkpointable.global_step) - self.evaluate(state_ops.assign(network._named_dense.variables[1], [43.])) - self.evaluate(state_ops.assign(root_checkpointable.global_step, 3)) - optimizer_variables = self.evaluate(optimizer.variables()) - self.evaluate(state_ops.assign(m_bias_slot, [-2.])) - # Immediate restoration - checkpointable.restore( - save_path=save_path, - root_checkpointable=root_checkpointable, - object_graph_proto=serialized_graph) - self.assertAllEqual([42.], self.evaluate(network._named_dense.variables[1])) - self.assertAllEqual(1, self.evaluate(root_checkpointable.global_step)) - self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) - with ops.Graph().as_default(): - on_create_network = MyNetwork() - on_create_optimizer = CheckpointableAdam(0.001) - on_create_root = Root( - optimizer=on_create_optimizer, network=on_create_network) - with self.test_session(graph=ops.get_default_graph()): - # Deferred restoration - checkpointable.restore( - save_path=save_path, - root_checkpointable=on_create_root, - object_graph_proto=serialized_graph) - on_create_network(constant_op.constant([[3.]])) # create variables - self.assertAllEqual(1, self.evaluate(on_create_root.global_step)) - self.assertAllEqual([42.], - self.evaluate( - on_create_network._named_dense.variables[1])) - on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_network._named_dense.variables[1], "m") - # Optimizer slot variables are created when the original variable is - # restored. - self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) - # beta1_power and beta2_power haven't been created yet, but everything - # else matches. - self.assertAllEqual(optimizer_variables[2:], - self.evaluate(on_create_optimizer.variables())) - on_create_optimizer._create_slots( - [resource_variable_ops.ResourceVariable([1.])]) - beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() - self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) - self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) - - def testDeferredRestorationUsageEager(self): - """An idiomatic eager execution example.""" - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - latest_object_graph = None # Will be saved with the checkpoint eventually. - for training_continuation in range(3): - with ops.Graph().as_default(): - network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root = Root(optimizer=optimizer, network=network) - checkpointable.restore( - save_path=core_saver.latest_checkpoint(checkpoint_directory), - root_checkpointable=root, - object_graph_proto=latest_object_graph) - for _ in range(num_training_steps): - # TODO(allenl): Use a Dataset and serialize/checkpoint it. - input_value = constant_op.constant([[3.]]) - optimizer.minimize( - lambda: network(input_value), # pylint: disable=cell-var-from-loop - global_step=root.global_step) - latest_object_graph, _ = checkpointable.save( - file_prefix=checkpoint_prefix, - root_checkpointable=root) - self.assertEqual((training_continuation + 1) * num_training_steps, - root.global_step.numpy()) - - def testUsageGraph(self): - """Expected usage when graph building.""" - with context.graph_mode(): - num_training_steps = 10 - checkpoint_directory = self.get_temp_dir() - checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - latest_object_graph = None - for training_continuation in range(3): - with ops.Graph().as_default(): - network = MyNetwork() - optimizer = CheckpointableAdam(0.001) - root = Root(optimizer=optimizer, network=network) - input_value = constant_op.constant([[3.]]) - train_op = optimizer.minimize( - network(input_value), - global_step=root.global_step) - init_op = variables.global_variables_initializer() - checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) - with self.test_session(graph=ops.get_default_graph()) as session: - if checkpoint_path is None: - self.assertEqual(0, training_continuation) - session.run(init_op) - # Another alternative would be to run initializers automatically - # if no checkpoint is being loaded. This would make deferred - # loading a bit more useful with graph execution. - else: - checkpointable.restore( - save_path=checkpoint_path, - root_checkpointable=root, - object_graph_proto=latest_object_graph, - session=session) - for _ in range(num_training_steps): - session.run(train_op) - latest_object_graph, _ = checkpointable.save( - file_prefix=checkpoint_prefix, - root_checkpointable=root, - session=session) - self.assertEqual((training_continuation + 1) * num_training_steps, - session.run(root.global_step)) - - def _get_checkpoint_name(self, name): - root = checkpointable.Checkpointable() - root.add_variable(name=name, shape=[1, 2], dtype=dtypes.float64) - named_variables, _ = checkpointable._serialize_object_graph(root) - checkpoint_name, = named_variables.keys() - with ops.name_scope("root/" + checkpoint_name): - pass # Make sure we can use this as an op name if we prefix it. - return checkpoint_name - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testVariableNameEscaping(self): - self.assertEqual(r"a_S__b_S__c", self._get_checkpoint_name(r"a/b/c")) - self.assertEqual(r"b", self._get_checkpoint_name(r"b")) - self.assertEqual(r"c_S__", self._get_checkpoint_name(r"c/")) - self.assertEqual(r"d_S___S_._", self._get_checkpoint_name(r"d/_S__")) - - @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) - def testNumberedPath(self): - root = checkpointable.Checkpointable() - leaf = checkpointable.Checkpointable() - root.track_checkpointable(leaf, name="leaf") - leaf.add_variable(name="v", shape=[]) - named_variables, _ = checkpointable._serialize_object_graph(root) - variable_name, = named_variables.keys() - self.assertEqual(r"leaf/v", variable_name) - - @test_util.run_in_graph_and_eager_modes() - def testLocalNameValidation(self): - root = checkpointable.Checkpointable() - leaf = checkpointable.Checkpointable() - with self.assertRaisesRegexp(ValueError, "invalid name"): - # Leading dashes are reserved, which avoids conflicts with un-named edges - # in paths and the optimizer slots identifier. - root.track_checkpointable(leaf, name="-unnamed-12") - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py new file mode 100644 index 0000000000..d3c57bc606 --- /dev/null +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -0,0 +1,413 @@ +"""Utilities for working with Checkpointable objects.""" +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + +from tensorflow.contrib.eager.proto import checkpointable_object_graph_pb2 +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.training import checkpointable as core_checkpointable +from tensorflow.python.training import optimizer as optimizer_lib +from tensorflow.python.training import saver as saver_lib + + +_ESCAPE_CHAR = "." # For avoiding conflicts with user-specified names. + +# Keyword for identifying that the next bit of a checkpoint variable name is a +# slot name. Checkpoint names for slot variables look like: +# +# /<_OPTIMIZER_SLOTS_NAME>// +# +# Where is a full path from the checkpoint root to the +# variable being slotted for. +_OPTIMIZER_SLOTS_NAME = _ESCAPE_CHAR + "OPTIMIZER_SLOT" +# Keyword for separating the path to an object from the name of an +# attribute in checkpoint names. Used like: +# /<_OBJECT_ATTRIBUTES_NAME>/ +_OBJECT_ATTRIBUTES_NAME = _ESCAPE_CHAR + "ATTRIBUTES" +# Key where the object graph proto is saved in a TensorBundle +_OBJECT_GRAPH_PROTO_KEY = "_CHECKPOINTABLE_OBJECT_GRAPH" + + +# TODO(allenl): If this ends up in a public API, consider adding LINT.IfChange +# or consolidating the implementation with get_variable. +def _default_getter(name, shape, dtype, initializer=None, + partition_info=None, **kwargs): + """A pared-down version of get_variable which does not reuse variables.""" + dtype = dtypes.as_dtype(dtype) + shape_object = tensor_shape.as_shape(shape) + with ops.init_scope(): + if initializer is None: + initializer, initializing_from_value = ( + variable_scope._get_default_variable_store()._get_default_initializer( # pylint: disable=protected-access + name=name, shape=shape_object, dtype=dtype)) + else: + initializing_from_value = not callable(initializer) + # Same logic as get_variable + variable_dtype = dtype.base_dtype + if initializing_from_value: + if shape is not None: + raise ValueError("If initializer is a constant, do not specify shape.") + initial_value = initializer + else: + # Instantiate initializer if provided initializer is a type object. + if isinstance(initializer, type(init_ops.Initializer)): + initializer = initializer(dtype=dtype) + def initial_value(): + return initializer( + shape_object.as_list(), dtype=dtype, partition_info=partition_info) + return resource_variable_ops.ResourceVariable( + initial_value=initial_value, + name=name, + dtype=variable_dtype, + **kwargs + ) + + +def add_variable(checkpointable, name, shape=None, dtype=dtypes.float32, + initializer=None): + """Add a variable to a Checkpointable with no scope influence.""" + return checkpointable._add_variable_with_custom_getter( # pylint: disable=protected-access + name=name, shape=shape, dtype=dtype, + initializer=initializer, getter=_default_getter) + + +def _breadth_first_checkpointable_traversal(root_checkpointable): + """Find shortest paths to all variables owned by dependencies of root.""" + bfs_sorted = [] + to_visit = collections.deque([root_checkpointable]) + path_to_root = {root_checkpointable: ()} + while to_visit: + current_checkpointable = to_visit.popleft() + current_checkpointable._maybe_initialize_checkpointable() # pylint: disable=protected-access + bfs_sorted.append(current_checkpointable) + for child_checkpointable in ( + current_checkpointable._checkpoint_dependencies): # pylint: disable=protected-access + if child_checkpointable.ref not in path_to_root: + path_to_root[child_checkpointable.ref] = ( + path_to_root[current_checkpointable] + (child_checkpointable,)) + to_visit.append(child_checkpointable.ref) + return bfs_sorted, path_to_root + + +def _escape_local_name(name): + # We need to support slashes in local names for compatibility, since this + # naming scheme is being patched in to things like Layer.add_variable where + # slashes were previously accepted. We also want to use slashes to indicate + # edges traversed to reach the variable, so we escape forward slashes in + # names. + return (name.replace(_ESCAPE_CHAR, _ESCAPE_CHAR + _ESCAPE_CHAR) + .replace(r"/", _ESCAPE_CHAR + "S")) + + +def _object_prefix_from_path(path_to_root): + return "/".join( + (_escape_local_name(checkpointable.name) + for checkpointable in path_to_root)) + + +def _slot_variable_naming_for_optimizer(optimizer_path): + """Make a function for naming slot variables in an optimizer.""" + # Name slot variables: + # + # /<_OPTIMIZER_SLOTS_NAME>// + # + # where is exactly the checkpoint name used for the original + # variable, including the path from the checkpoint root and the local name in + # the object which owns it. Note that we only save slot variables if the + # variable it's slotting for is also being saved. + + optimizer_identifier = "/%s/%s/" % (_OPTIMIZER_SLOTS_NAME, optimizer_path) + + def _name_slot_variable(variable_path, slot_name): + """With an optimizer specified, name a slot variable.""" + return (variable_path + + optimizer_identifier + + _escape_local_name(slot_name)) + + return _name_slot_variable + + +def _serialize_slot_variables(checkpointable_objects, node_ids, object_names): + """Gather and name slot variables.""" + non_slot_objects = list(checkpointable_objects) + slot_variables = {} + for checkpointable in non_slot_objects: + if isinstance(checkpointable, optimizer_lib.Optimizer): + naming_scheme = _slot_variable_naming_for_optimizer( + optimizer_path=object_names[checkpointable]) + slot_names = checkpointable.get_slot_names() + for slot_name in slot_names: + for original_variable_node_id, original_variable in enumerate( + non_slot_objects): + try: + slot_variable = checkpointable.get_slot( + original_variable, slot_name) + except AttributeError: + slot_variable = None + if slot_variable is None: + continue + slot_variable._maybe_initialize_checkpointable() # pylint: disable=protected-access + if slot_variable._checkpoint_dependencies: # pylint: disable=protected-access + # TODO(allenl): Gather dependencies of slot variables. + raise NotImplementedError( + "Currently only variables with no dependencies can be saved as " + "slot variables. File a feature request if this limitation " + "bothers you.") + if slot_variable in node_ids: + raise NotImplementedError( + "A slot variable was re-used as a dependency of a " + "Checkpointable object. This is not currently allowed. File a " + "feature request if this limitation bothers you.") + checkpoint_name = naming_scheme( + variable_path=object_names[original_variable], + slot_name=slot_name) + object_names[slot_variable] = checkpoint_name + slot_variable_node_id = len(checkpointable_objects) + node_ids[slot_variable] = slot_variable_node_id + checkpointable_objects.append(slot_variable) + slot_variable_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph + .Object.SlotVariableReference( + slot_name=slot_name, + original_variable_node_id=original_variable_node_id, + slot_variable_node_id=slot_variable_node_id)) + slot_variables.setdefault(checkpointable, []).append( + slot_variable_proto) + return slot_variables + + +def _serialize_checkpointables( + checkpointable_objects, node_ids, object_names, slot_variables): + """Name non-slot `Checkpointable`s and add them to `object_graph_proto`.""" + object_graph_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + named_saveables = {} + + for checkpoint_id, checkpointable in enumerate(checkpointable_objects): + assert node_ids[checkpointable] == checkpoint_id + object_proto = object_graph_proto.nodes.add() + object_proto.slot_variables.extend(slot_variables.get(checkpointable, ())) + object_name = object_names[checkpointable] + for name, saveable in ( + checkpointable._gather_tensors_for_checkpoint().items()): # pylint: disable=protected-access + attribute = object_proto.attributes.add() + attribute.name = name + attribute.checkpoint_key = "%s/%s/%s" % ( + object_name, _OBJECT_ATTRIBUTES_NAME, _escape_local_name(name)) + # Figure out the name-based Saver's name for this variable. + saver_dict = saver_lib.BaseSaverBuilder.OpListToDict( + [saveable], convert_variable_to_tensor=False) + attribute.full_name, = saver_dict.keys() + named_saveables[attribute.checkpoint_key] = saveable + + for child in checkpointable._checkpoint_dependencies: # pylint: disable=protected-access + child_proto = object_proto.children.add() + child_proto.node_id = node_ids[child.ref] + child_proto.local_name = child.name + + return named_saveables, object_graph_proto + + +def _serialize_object_graph(root_checkpointable): + """Determine checkpoint keys for variables and build a serialized graph. + + Non-slot variables are keyed based on a shortest path from the root saveable + to the object which owns the variable (i.e. the one which called + `Checkpointable._add_variable` to create it). + + Slot variables are keyed based on a shortest path to the variable being + slotted for, a shortest path to their optimizer, and the slot name. + + Args: + root_checkpointable: A `Checkpointable` object whose variables (including + the variables of dependencies, recursively) should be saved. + + Returns: + A tuple of (named_variables, object_graph_proto): + named_variables: A dictionary mapping names to variable objects. + object_graph_proto: A CheckpointableObjectGraph protocol buffer containing + the serialized object graph and variable references. + + Raises: + ValueError: If there are invalid characters in an optimizer's slot names. + """ + checkpointable_objects, path_to_root = ( + _breadth_first_checkpointable_traversal(root_checkpointable)) + object_names = { + obj: _object_prefix_from_path(path) + for obj, path in path_to_root.items()} + node_ids = {node: node_id for node_id, node + in enumerate(checkpointable_objects)} + slot_variables = _serialize_slot_variables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names) + return _serialize_checkpointables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names, + slot_variables=slot_variables) + + +class _NoRestoreSaveable(saver_lib.BaseSaverBuilder.SaveableObject): + + def __init__(self, tensor, name): + spec = saver_lib.BaseSaverBuilder.SaveSpec(tensor, "", name) + super(_NoRestoreSaveable, self).__init__(tensor, [spec], name) + + def restore(self, restored_tensors, restored_shapes): + return control_flow_ops.no_op() + + +def save(file_prefix, root_checkpointable, checkpoint_number=None, + session=None): + """Save a training checkpoint. + + Args: + file_prefix: A prefix to use for the checkpoint filenames + (/path/to/directory/and_a_prefix). Names are generated based on this + prefix and the global step, if provided. + root_checkpointable: A Checkpointable object to save. The checkpoint + includes variables created by this object and any Checkpointable objects + it depends on. + checkpoint_number: An integer variable or Tensor, used to number + checkpoints. Typically this value is saved along with other variables in + training checkpoints, which will happen automatically if it was created by + `root_checkpointable` or one of its dependencies (via + `Checkpointable._add_variable`). + session: The session to evaluate variables in. Ignored when executing + eagerly. If not provided when graph building, the default session is used. + + Returns: + The full path to the checkpoint. + """ + named_variables, serialized_graph = _serialize_object_graph( + root_checkpointable) + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + else: + session = None + assert _OBJECT_GRAPH_PROTO_KEY not in named_variables + # TODO(allenl): Feed rather than embedding a constant. + named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable( + tensor=constant_op.constant( + serialized_graph.SerializeToString(), dtype=dtypes.string), + name=_OBJECT_GRAPH_PROTO_KEY) + with ops.device("/device:CPU:0"): + save_path = saver_lib.Saver(var_list=named_variables).save( + sess=session, + save_path=file_prefix, + write_meta_graph=False, + global_step=checkpoint_number) + return save_path + + +class CheckpointLoadStatus(object): + + def __init__(self, checkpoint): + self._checkpoint = checkpoint + + def assert_consumed(self): + """Asserts that all objects in the checkpoint have been created/matched.""" + for node_id, node in enumerate(self._checkpoint.object_graph_proto.nodes): + checkpointable = self._checkpoint.object_by_proto_id.get(node_id, None) + if checkpointable is None: + raise AssertionError("Unresolved object in checkpoint: %s" % (node)) + if checkpointable._update_uid < self._checkpoint.restore_uid: # pylint: disable=protected-access + raise AssertionError( + "Object not assigned a value from checkpoint: %s" % (node)) + return self + + +def restore(save_path, root_checkpointable, session=None): + """Restore a training checkpoint. + + Restores the values of variables created with `Checkpointable._add_variable` + in `root_checkpointable` and any objects that it tracks (transitive). Either + assigns values immediately if variables to restore have been created already, + or defers restoration until the variables are created. Dependencies added to + `root_checkpointable` after this call will be matched if they have a + corresponding object in the checkpoint. + + When building a graph, restorations are executed in the default session if + `session` is `None`. Variable initializers read checkpointed values. + + To disallow deferred loading, assert immediately that all checkpointed + variables have been matched to variable objects: + + ```python + restore(path, root).assert_consumed() + ``` + + An exception will be raised unless every object was matched and its variables + already exist. + + Args: + save_path: The path to the checkpoint, as returned by `save` or + `tf.train.latest_checkpoint`. If None (as when there is no latest + checkpoint for `tf.train.latest_checkpoint` to return), does nothing. + root_checkpointable: The root of the object graph to restore. Variables to + restore need not have been created yet, but all dependencies on other + Checkpointable objects should already be declared. Objects in the + dependency graph are matched to objects in the checkpointed graph, and + matching objects have their variables restored (or the checkpointed values + saved for eventual restoration when the variable is created). + session: The session to evaluate assignment ops in. Ignored when executing + eagerly. If not provided when graph building, the default session is used. + Returns: + A CheckpointLoadStatus object, which can be used to make assertions about + the status of checkpoint restoration. + """ + if save_path is None: + return + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + else: + session = None + object_graph_string, = io_ops.restore_v2( + prefix=save_path, + tensor_names=[_OBJECT_GRAPH_PROTO_KEY], + shape_and_slices=[""], + dtypes=[dtypes.string], + name="object_graph_proto_read") + if session is not None: + object_graph_string = session.run(object_graph_string) + else: + object_graph_string = object_graph_string.numpy() + object_graph_proto = ( + checkpointable_object_graph_pb2.CheckpointableObjectGraph()) + object_graph_proto.ParseFromString(object_graph_string) + checkpoint = core_checkpointable._Checkpoint( # pylint: disable=protected-access + object_graph_proto=object_graph_proto, + save_path=save_path, + session=session) + core_checkpointable._CheckpointPosition( # pylint: disable=protected-access + checkpoint=checkpoint, proto_id=0).restore(root_checkpointable) + return CheckpointLoadStatus(checkpoint) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py new file mode 100644 index 0000000000..1394f0cf0f --- /dev/null +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -0,0 +1,857 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools +import os +import unittest + +import six + +from tensorflow.contrib.eager.python import checkpointable_utils +from tensorflow.contrib.eager.python import network as network_lib +from tensorflow.python.eager import context +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.layers import base +from tensorflow.python.layers import core +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.training import adam +from tensorflow.python.training import checkpointable +from tensorflow.python.training import saver as core_saver +from tensorflow.python.training import training_util + + +class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable): + + def __init__(self, *args, **kwargs): + checkpointable.Checkpointable.__init__(self) + core.Dense.__init__(self, *args, **kwargs) + + def add_variable(self, name, shape, **kwargs): + # Calls both Checkpointable._add_variable and Layer.add_variable. Eventually + # Layer.add_variable should inherit from Checkpointable and simply call + # super and then do post-processing. + return checkpointable.Checkpointable._add_variable_with_custom_getter( + self, + name=name, + shape=shape, + getter=functools.partial(core.Dense.add_variable, self), + **kwargs) + + +# pylint: disable=not-callable +class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): + + def __setattr__(self, name, value): + if isinstance(value, base.Layer): + self.track_layer(value, name=name) + # Checkpointable is next in the method resolution order, so this will catch + # Checkpointable objects which aren't Layers. + super(CheckpointableNetwork, self).__setattr__(name, value) + + def track_layer(self, layer, name): + self._track_checkpointable(layer, name=name) + return super(CheckpointableNetwork, self).track_layer(layer) + + +class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable): + + # NOTE: Copied from Optimizer with modifications to use add_variable + # for non-slot variables. These contortions are necessary to maintain + # checkpoint compatibility with variable.name based saving. + # TODO(allenl): Make this cleaner. + def _create_non_slot_variable(self, initial_value, name, colocate_with): + """Add an extra variable, not associated with a slot.""" + if context.in_graph_mode(): + graph = colocate_with.graph + else: + graph = None + + key = (name, graph) + v = self._non_slot_dict.get(key, None) + if v is None: + with ops.colocate_with(colocate_with): + def _variable_getter(name, shape, dtype, initializer): + del shape, dtype # not used, but there for compatibility + return variable_scope.variable( + name=name, initial_value=initializer, trainable=False) + + initial_value = ops.convert_to_tensor(initial_value) + v = self._add_variable_with_custom_getter( + name=name, + shape=initial_value.get_shape(), + initializer=initial_value, + getter=_variable_getter) + + self._non_slot_dict[key] = v + + return v + + +class NonLayerCheckpointable(checkpointable.Checkpointable): + + def __init__(self): + super(NonLayerCheckpointable, self).__init__() + self.a_variable = checkpointable_utils.add_variable( + self, name="a_variable", shape=[]) + + +class MyNetwork(CheckpointableNetwork): + """A concrete Network for testing.""" + + def __init__(self): + super(MyNetwork, self).__init__() + self._named_dense = CheckpointableDenseLayer(1, use_bias=True) + self._via_track_layer = self.track_layer( + CheckpointableDenseLayer(1, use_bias=False), name="via_track_layer") + # We can still track Checkpointables which aren't Layers. + self._non_layer = NonLayerCheckpointable() + + def call(self, values): + return self._via_track_layer(self._named_dense(values)) + + +class Checkpoint(checkpointable.Checkpointable): + """A utility class which groups `Checkpointable` objects.""" + + def __init__(self, **kwargs): + super(Checkpoint, self).__init__() + for k, v in sorted(kwargs.items(), key=lambda item: item[0]): + setattr(self, k, v) + self._save_counter = None + + @property + def save_counter(self): + """An integer variable which starts at zero and is incremented on save. + + Used to number checkpoints. + + Returns: + The save counter variable. + """ + if self._save_counter is None: + # Initialized to 0 and incremented before saving. + self._save_counter = checkpointable_utils.add_variable( + self, name="save_counter", initializer=0, dtype=dtypes.int64) + return self._save_counter + + def save(self, file_prefix, session=None): + assign_op = self.save_counter.assign_add(1) + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + session.run(assign_op) + return checkpointable_utils.save( + file_prefix=file_prefix, + root_checkpointable=self, + checkpoint_number=self.save_counter, + session=session) + + def restore(self, save_path): + return checkpointable_utils.restore( + save_path=save_path, + root_checkpointable=self) + + +class InterfaceTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testAddVariable(self): + obj = NonLayerCheckpointable() + with self.assertRaisesRegexp(ValueError, "do not specify shape"): + checkpointable_utils.add_variable( + obj, name="shape_specified_twice", shape=[], initializer=1) + constant_initializer = checkpointable_utils.add_variable( + obj, name="constant_initializer", initializer=1) + with variable_scope.variable_scope("some_variable_scope"): + ones_initializer = checkpointable_utils.add_variable( + obj, + name="ones_initializer", + shape=[2], + initializer=init_ops.ones_initializer(dtype=dtypes.float32)) + bare_initializer = checkpointable_utils.add_variable( + obj, + name="bare_initializer", + shape=[2, 2], + dtype=dtypes.float64, + initializer=init_ops.zeros_initializer) + + # Even in graph mode, there are no naming conflicts between objects, only + # naming conflicts within an object. + other_duplicate = resource_variable_ops.ResourceVariable( + name="duplicate", initial_value=1.) + duplicate = checkpointable_utils.add_variable( + obj, name="duplicate", shape=[]) + with self.assertRaisesRegexp(ValueError, "'duplicate' already exists"): + checkpointable_utils.add_variable(obj, name="duplicate", shape=[]) + + if context.in_graph_mode(): + self.evaluate(variables.global_variables_initializer()) + self.assertEqual("constant_initializer:0", constant_initializer.name) + self.assertEqual(1, self.evaluate(constant_initializer)) + self.assertEqual("some_variable_scope/ones_initializer:0", + ones_initializer.name) + self.assertAllEqual([1, 1], self.evaluate(ones_initializer)) + self.assertAllEqual([[0., 0.], + [0., 0.]], self.evaluate(bare_initializer)) + self.assertEqual("a_variable:0", obj.a_variable.name) + self.assertEqual("duplicate:0", other_duplicate.name) + if context.in_graph_mode(): + # The .name attribute may be globally influenced, but the checkpoint name + # won't be (tested below). + self.assertEqual("duplicate_1:0", duplicate.name) + else: + # When executing eagerly, there's no uniquification of variable names. The + # checkpoint name will be the same. + self.assertEqual("duplicate:0", duplicate.name) + named_variables, _ = checkpointable_utils._serialize_object_graph(obj) + expected_checkpoint_names = ( + "a_variable/.ATTRIBUTES/VARIABLE_VALUE", + "bare_initializer/.ATTRIBUTES/VARIABLE_VALUE", + "constant_initializer/.ATTRIBUTES/VARIABLE_VALUE", + "duplicate/.ATTRIBUTES/VARIABLE_VALUE", + "ones_initializer/.ATTRIBUTES/VARIABLE_VALUE", + ) + six.assertCountEqual( + self, expected_checkpoint_names, named_variables.keys()) + + def testInitNotCalled(self): + + class NoInit(checkpointable.Checkpointable): + + def __init__(self): + pass + + # __init__ for Checkpointable will be called implicitly. + checkpointable_utils.add_variable(NoInit(), "var", shape=[]) + + def testShapeDtype(self): + root = checkpointable.Checkpointable() + v1 = checkpointable_utils.add_variable( + root, name="v1", initializer=3., dtype=dtypes.float64) + self.assertEqual(dtypes.float64, v1.dtype) + v2 = checkpointable_utils.add_variable( + root, + name="v2", + shape=[3], + initializer=init_ops.ones_initializer, + dtype=dtypes.float64) + self.assertEqual(dtypes.float64, v2.dtype) + self.assertAllEqual([1., 1., 1.], self.evaluate(v2)) + + +class CheckpointingTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testNamingWithOptimizer(self): + input_value = constant_op.constant([[3.]]) + network = MyNetwork() + # A nuisance Network using the same optimizer. Its slot variables should not + # go in the checkpoint, since it is never depended on. + other_network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + optimizer_step = training_util.get_or_create_global_step() + root_checkpointable = Checkpoint( + optimizer=optimizer, network=network, optimizer_step=optimizer_step) + if context.in_eager_mode(): + optimizer.minimize( + lambda: network(input_value), + global_step=optimizer_step) + optimizer.minimize( + lambda: other_network(input_value), + global_step=optimizer_step) + else: + train_op = optimizer.minimize( + network(input_value), global_step=optimizer_step) + optimizer.minimize( + other_network(input_value), + global_step=optimizer_step) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(train_op) + named_variables, serialized_graph = ( + checkpointable_utils._serialize_object_graph(root_checkpointable)) + expected_checkpoint_names = ( + # Created in the root node, so no prefix. + "optimizer_step", + # No name provided to track_checkpointable(), so the position is used + # instead (one-based). + "network/via_track_layer/kernel", + # track_checkpointable() with a name provided, so that's used + "network/_named_dense/kernel", + "network/_named_dense/bias", + # non-Layer dependency of the network + "network/_non_layer/a_variable", + # The optimizer creates two non-slot variables + "optimizer/beta1_power", + "optimizer/beta2_power", + # Slot variables + "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/m", + "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/v", + "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", + "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", + "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", + "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", + ) + suffix = "/.ATTRIBUTES/VARIABLE_VALUE" + expected_checkpoint_names = [ + name + suffix for name in expected_checkpoint_names] + six.assertCountEqual(self, expected_checkpoint_names, + named_variables.keys()) + # Check that we've mapped to the right variable objects (not exhaustive) + self.assertEqual( + "global_step:0", + named_variables["optimizer_step" + suffix].name) + self.assertEqual( + "my_network/checkpointable_dense_layer_1/kernel:0", + named_variables["network/via_track_layer/kernel" + suffix].name) + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel:0", + named_variables["network/_named_dense/kernel" + suffix].name) + self.assertEqual( + "beta1_power:0", + named_variables["optimizer/beta1_power" + suffix].name) + self.assertEqual( + "beta2_power:0", + named_variables["optimizer/beta2_power" + suffix].name) + # Spot check the generated protocol buffers. + self.assertEqual("optimizer", + serialized_graph.nodes[0].children[1].local_name) + optimizer_node = serialized_graph.nodes[serialized_graph.nodes[0].children[ + 1].node_id] + self.assertEqual("beta1_power", + optimizer_node.children[0].local_name) + self.assertEqual("beta1_power", + serialized_graph.nodes[optimizer_node.children[0].node_id] + .attributes[0].full_name) + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel", + serialized_graph.nodes[optimizer_node.slot_variables[0] + .original_variable_node_id] + .attributes[0].full_name) + # We strip off the :0 suffix, as variable.name-based saving does. + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel/Adam", + serialized_graph.nodes[optimizer_node.slot_variables[0] + .slot_variable_node_id] + .attributes[0].full_name) + self.assertEqual( + "my_network/checkpointable_dense_layer/kernel/Adam:0", + optimizer.get_slot( + var=named_variables["network/_named_dense/kernel" + suffix], + name="m").name) + self.assertEqual( + "network/_named_dense/kernel" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0] + .original_variable_node_id].attributes[0].checkpoint_key) + self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) + self.assertEqual( + "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, + serialized_graph.nodes[ + optimizer_node.slot_variables[0] + .slot_variable_node_id].attributes[0].checkpoint_key) + + @test_util.run_in_graph_and_eager_modes() + def testSaveRestore(self): + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + root_checkpointable = Checkpoint(optimizer=optimizer, network=network) + input_value = constant_op.constant([[3.]]) + if context.in_eager_mode(): + optimizer.minimize( + lambda: network(input_value)) + else: + train_op = optimizer.minimize(network(input_value)) + # TODO(allenl): Make initialization more pleasant when graph building. + root_checkpointable.save_counter # pylint: disable=pointless-statement + self.evaluate(variables.global_variables_initializer()) + self.evaluate(train_op) + prefix = os.path.join(self.get_temp_dir(), "ckpt") + self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.])) + m_bias_slot = optimizer.get_slot(network._named_dense.variables[1], "m") + self.evaluate(state_ops.assign(m_bias_slot, [1.5])) + save_path = root_checkpointable.save(file_prefix=prefix) + self.evaluate(state_ops.assign(network._named_dense.variables[1], [43.])) + self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3)) + optimizer_variables = self.evaluate(optimizer.variables()) + self.evaluate(state_ops.assign(m_bias_slot, [-2.])) + # Immediate restoration + root_checkpointable.restore(save_path=save_path).assert_consumed() + self.assertAllEqual([42.], self.evaluate(network._named_dense.variables[1])) + self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter)) + self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) + with ops.Graph().as_default(): + on_create_network = MyNetwork() + on_create_optimizer = CheckpointableAdam(0.001) + on_create_root = Checkpoint( + optimizer=on_create_optimizer, network=on_create_network) + with self.test_session(graph=ops.get_default_graph()): + # Deferred restoration + status = on_create_root.restore(save_path=save_path) + on_create_network(constant_op.constant([[3.]])) # create variables + self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) + self.assertAllEqual([42.], + self.evaluate( + on_create_network._named_dense.variables[1])) + on_create_m_bias_slot = on_create_optimizer.get_slot( + on_create_network._named_dense.variables[1], "m") + # Optimizer slot variables are created when the original variable is + # restored. + self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) + self.assertAllEqual(optimizer_variables[2:], + self.evaluate(on_create_optimizer.variables())) + on_create_optimizer._create_slots( + [resource_variable_ops.ResourceVariable([1.])]) + status.assert_consumed() + beta1_power, beta2_power = on_create_optimizer._get_beta_accumulators() + self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) + self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) + + def testDeferredRestorationUsageEager(self): + """An idiomatic eager execution example.""" + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + root = Checkpoint( + optimizer=optimizer, network=network, + optimizer_step=training_util.get_or_create_global_step()) + root.restore(core_saver.latest_checkpoint(checkpoint_directory)) + for _ in range(num_training_steps): + # TODO(allenl): Use a Dataset and serialize/checkpoint it. + input_value = constant_op.constant([[3.]]) + optimizer.minimize( + lambda: network(input_value), # pylint: disable=cell-var-from-loop + global_step=root.optimizer_step) + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + root.optimizer_step.numpy()) + + def testUsageGraph(self): + """Expected usage when graph building.""" + with context.graph_mode(): + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with ops.Graph().as_default(): + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + root = Checkpoint( + optimizer=optimizer, network=network, + global_step=training_util.get_or_create_global_step()) + input_value = constant_op.constant([[3.]]) + train_op = optimizer.minimize( + network(input_value), + global_step=root.global_step) + root.save_counter # pylint: disable=pointless-statement + init_op = variables.global_variables_initializer() + checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + with self.test_session(graph=ops.get_default_graph()) as session: + if checkpoint_path is None: + self.assertEqual(0, training_continuation) + session.run(init_op) + # Another alternative would be to run initializers automatically + # if no checkpoint is being loaded. This would make deferred + # loading a bit more useful with graph execution. + else: + checkpointable_utils.restore( + save_path=checkpoint_path, + root_checkpointable=root, + session=session) + for _ in range(num_training_steps): + session.run(train_op) + root.save(file_prefix=checkpoint_prefix, + session=session) + self.assertEqual((training_continuation + 1) * num_training_steps, + session.run(root.global_step)) + self.assertEqual(training_continuation + 1, + session.run(root.save_counter)) + + def _get_checkpoint_name(self, name): + root = checkpointable.Checkpointable() + checkpointable_utils.add_variable( + root, name=name, shape=[1, 2], dtype=dtypes.float64) + named_variables, _ = checkpointable_utils._serialize_object_graph(root) + checkpoint_name, = named_variables.keys() + with ops.name_scope("root/" + checkpoint_name): + pass # Make sure we can use this as an op name if we prefix it. + return checkpoint_name + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testVariableNameEscaping(self): + suffix = "/.ATTRIBUTES/VARIABLE_VALUE" + self.assertEqual(r"a.Sb.Sc" + suffix, self._get_checkpoint_name(r"a/b/c")) + self.assertEqual(r"b" + suffix, self._get_checkpoint_name(r"b")) + self.assertEqual(r"c.S" + suffix, self._get_checkpoint_name(r"c/")) + self.assertEqual(r"d.S..S" + suffix, self._get_checkpoint_name(r"d/.S")) + self.assertEqual(r"d.S..ATTRIBUTES.Sf" + suffix, + self._get_checkpoint_name(r"d/.ATTRIBUTES/f")) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testNumberedPath(self): + root = checkpointable.Checkpointable() + leaf = checkpointable.Checkpointable() + root.leaf = leaf + checkpointable_utils.add_variable(leaf, name="v", shape=[]) + named_variables, _ = checkpointable_utils._serialize_object_graph(root) + variable_name, = named_variables.keys() + self.assertEqual(r"leaf/v/.ATTRIBUTES/VARIABLE_VALUE", variable_name) + + @test_util.run_in_graph_and_eager_modes() + def testLocalNameValidation(self): + root = checkpointable.Checkpointable() + leaf = checkpointable.Checkpointable() + # Dots are escaped, which avoids conflicts with reserved names. + root._track_checkpointable(leaf, name=".ATTRIBUTES") + checkpointable_utils.add_variable(checkpointable=leaf, name="a", shape=[]) + named_variables, _ = checkpointable_utils._serialize_object_graph(root) + name, = named_variables.keys() + self.assertEqual(name, "..ATTRIBUTES/a/.ATTRIBUTES/VARIABLE_VALUE") + + @test_util.run_in_graph_and_eager_modes() + def testLateDependencyTracking(self): + + class Dependency(checkpointable.Checkpointable): + + def build(self): + self.var = checkpointable_utils.add_variable( + self, "var", initializer=0.) + + class LateDependencies(checkpointable.Checkpointable): + + def add_dep(self): + self.dep = Dependency() + self.dep.build() + + original = LateDependencies() + original.add_dep() + self.evaluate(state_ops.assign(original.dep.var, 123.)) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = checkpointable_utils.save(checkpoint_prefix, original) + load_into = LateDependencies() + status = checkpointable_utils.restore(save_path, load_into) + with self.assertRaises(AssertionError): + status.assert_consumed() + load_into.add_dep() + status.assert_consumed() + self.assertEqual(123., self.evaluate(load_into.dep.var)) + + @test_util.run_in_graph_and_eager_modes() + def testDepAfterVar(self): + + class Dependency(checkpointable.Checkpointable): + + def build(self): + self.var = checkpointable_utils.add_variable( + self, "var", initializer=0.) + + class DepAfterVar(checkpointable.Checkpointable): + + def add_dep(self): + dep = Dependency() + dep.build() + self.dep = dep + + dep_after_var = DepAfterVar() + dep_after_var.add_dep() + self.evaluate(state_ops.assign(dep_after_var.dep.var, -14.)) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = checkpointable_utils.save( + checkpoint_prefix, dep_after_var) + + loaded_dep_after_var = DepAfterVar() + status = checkpointable_utils.restore( + save_path, loaded_dep_after_var) + loaded_dep_after_var.add_dep() + status.assert_consumed() + self.assertEqual(-14., self.evaluate(loaded_dep_after_var.dep.var)) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testDeferredSlotRestoration(self): + checkpoint_directory = self.get_temp_dir() + + root = checkpointable.Checkpointable() + root.var = checkpointable_utils.add_variable( + root, name="var", initializer=0.) + optimizer = CheckpointableAdam(0.1) + if context.in_graph_mode(): + train_op = optimizer.minimize(root.var) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(train_op) + else: + optimizer.minimize(root.var.read_value) + self.evaluate(state_ops.assign(root.var, 12.)) + no_slots_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "no_slots"), root) + root.optimizer = optimizer + self.evaluate(state_ops.assign(root.var, 13.)) + self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var), + 14.)) + slots_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "with_slots"), root) + new_root = checkpointable.Checkpointable() + # Load the slot-containing checkpoint (deferred), then immediately overwrite + # the non-slot variable (also deferred). + slot_status = checkpointable_utils.restore( + slots_path, new_root) + no_slot_status = checkpointable_utils.restore( + no_slots_path, new_root) + with self.assertRaises(AssertionError): + no_slot_status.assert_consumed() + new_root.var = checkpointable_utils.add_variable( + new_root, name="var", shape=[]) + self.assertEqual(12., self.evaluate(new_root.var)) + no_slot_status.assert_consumed() + new_root.optimizer = CheckpointableAdam(0.1) + with self.assertRaisesRegexp(AssertionError, "beta1_power"): + slot_status.assert_consumed() + self.assertEqual(12., self.evaluate(new_root.var)) + self.assertEqual(14., self.evaluate( + new_root.optimizer.get_slot(name="m", var=new_root.var))) + if context.in_graph_mode(): + train_op = new_root.optimizer.minimize(new_root.var) + self.evaluate(train_op) + else: + new_root.optimizer.minimize(new_root.var.read_value) + slot_status.assert_consumed() + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testOverlappingRestores(self): + checkpoint_directory = self.get_temp_dir() + save_root = checkpointable.Checkpointable() + save_root.dep = checkpointable.Checkpointable() + save_root.dep.var = checkpointable_utils.add_variable( + save_root.dep, name="var", initializer=0.) + self.evaluate(state_ops.assign(save_root.dep.var, 12.)) + first_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "first"), save_root) + self.evaluate(state_ops.assign(save_root.dep.var, 13.)) + second_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "second"), save_root) + + first_root = checkpointable.Checkpointable() + second_root = checkpointable.Checkpointable() + first_status = checkpointable_utils.restore( + first_path, first_root) + second_status = checkpointable_utils.restore( + second_path, second_root) + load_dep = checkpointable.Checkpointable() + load_dep.var = checkpointable_utils.add_variable( + load_dep, name="var", shape=[]) + first_root.dep = load_dep + first_status.assert_consumed() + self.assertEqual(12., self.evaluate(load_dep.var)) + second_root.dep = load_dep + second_status.assert_consumed() + self.assertEqual(13., self.evaluate(load_dep.var)) + + # Try again with the order of the restore() reversed. The last restore + # determines the final value. + first_root = checkpointable.Checkpointable() + second_root = checkpointable.Checkpointable() + second_status = checkpointable_utils.restore( + second_path, second_root) + first_status = checkpointable_utils.restore( + first_path, first_root) + load_dep = checkpointable.Checkpointable() + load_dep.var = checkpointable_utils.add_variable( + load_dep, name="var", shape=[]) + first_root.dep = load_dep + first_status.assert_consumed() + self.assertEqual(12., self.evaluate(load_dep.var)) + second_root.dep = load_dep + second_status.assert_consumed() + self.assertEqual(12., self.evaluate(load_dep.var)) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testAmbiguousLoad(self): + # Not OK to split one checkpoint object into two + checkpoint_directory = self.get_temp_dir() + save_root = checkpointable.Checkpointable() + save_root.dep_one = checkpointable.Checkpointable() + save_root.dep_two = checkpointable.Checkpointable() + dep_three = checkpointable.Checkpointable() + save_root.dep_one.dep_three = dep_three + save_root.dep_two.dep_three = dep_three + checkpointable_utils.add_variable(dep_three, name="var", initializer=0.) + self.evaluate(variables.global_variables_initializer()) + save_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "ckpt"), save_root) + load_root = checkpointable.Checkpointable() + checkpointable_utils.restore(save_path, load_root) + load_root.dep_one = checkpointable.Checkpointable() + load_root.dep_two = checkpointable.Checkpointable() + load_root.dep_one.dep_three = checkpointable.Checkpointable() + with self.assertRaisesRegexp(AssertionError, + "resolved to different objects"): + load_root.dep_two.dep_three = checkpointable.Checkpointable() + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def testObjectsCombined(self): + # Currently fine to load two checkpoint objects into one Python object + checkpoint_directory = self.get_temp_dir() + save_root = checkpointable.Checkpointable() + save_root.dep_one = checkpointable.Checkpointable() + save_root.dep_two = checkpointable.Checkpointable() + checkpointable_utils.add_variable( + save_root.dep_one, name="var1", initializer=32., dtype=dtypes.float64) + checkpointable_utils.add_variable( + save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64) + self.evaluate(variables.global_variables_initializer()) + save_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "ckpt"), save_root) + load_root = checkpointable.Checkpointable() + load_root.dep_one = checkpointable.Checkpointable() + load_root.dep_two = load_root.dep_one + v1 = checkpointable_utils.add_variable( + load_root.dep_one, name="var1", shape=[], dtype=dtypes.float64) + v2 = checkpointable_utils.add_variable( + load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64) + checkpointable_utils.restore(save_path, load_root).assert_consumed() + self.assertEqual(32., self.evaluate(v1)) + self.assertEqual(64., self.evaluate(v2)) + + @test_util.run_in_graph_and_eager_modes() + def testDependencyLoop(self): + # Note: this test creates garbage during eager execution because it + # purposefully creates a reference cycle. + first = checkpointable.Checkpointable() + second = checkpointable.Checkpointable() + first.second = second + second.first = first + first.v = checkpointable_utils.add_variable( + first, "v1", initializer=[3., 1., 4.]) + second.v = checkpointable_utils.add_variable( + second, "v2", initializer=[1., 1., 2., 3.]) + self.evaluate(variables.global_variables_initializer()) + checkpoint_directory = self.get_temp_dir() + save_path = checkpointable_utils.save( + os.path.join(checkpoint_directory, "ckpt"), first) + + # Test deferred loading + first_load = checkpointable.Checkpointable() + status = checkpointable_utils.restore(save_path, first_load) + second_load = checkpointable.Checkpointable() + first_load.second = second_load + second_load.first = first_load + with self.assertRaises(AssertionError): + status.assert_consumed() + first_load.v = checkpointable_utils.add_variable( + first_load, "v1", shape=[3]) + second_load.v = checkpointable_utils.add_variable( + second_load, "v2", shape=[4]) + status.assert_consumed() + self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) + self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v)) + + # Test loading when variables have already been created + self.evaluate(first_load.v.assign([2., 7., 1.])) + self.assertAllEqual([2., 7., 1.], self.evaluate(first_load.v)) + self.evaluate(second_load.v.assign([2., 7., 1., 8.])) + self.assertAllEqual([2., 7., 1., 8.], self.evaluate(second_load.v)) + checkpointable_utils.restore( + save_path, first_load).assert_consumed() + self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) + self.assertAllEqual([1., 1., 2., 3.], self.evaluate(second_load.v)) + + @test_util.run_in_graph_and_eager_modes() + def testRestoreOnAssign(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session(save_graph): + first = checkpointable.Checkpointable() + first.var1 = variable_scope.get_variable( + name="outside_var", initializer=0.) + first.var2 = variable_scope.get_variable( + name="blah", initializer=0.) + self.evaluate(first.var1.assign(4.)) + self.evaluate(first.var2.assign(8.)) + save_path = checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=first) + restore_graph = ops.Graph() + with restore_graph.as_default(), self.test_session(restore_graph): + second = checkpointable.Checkpointable() + second.var2 = variable_scope.get_variable( + name="blah", initializer=0.) + checkpointable_utils.restore(save_path, root_checkpointable=second) + recreated_var1 = variable_scope.get_variable( + name="outside_var", initializer=0.) + self.assertEqual(8., self.evaluate(second.var2)) + self.evaluate(recreated_var1.assign(-2.)) + self.assertEqual(-2., self.evaluate(recreated_var1)) + second.var1 = recreated_var1 + self.assertEqual(4., self.evaluate(recreated_var1)) + + # TODO(allenl): Saver class that doesn't pollute the graph with constants. + @unittest.skip("todo") + def testManySavesGraph(self): + """Saves after the first should not modify the graph.""" + with context.graph_mode(): + graph = ops.Graph() + with graph.as_default(), self.test_session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + obj.opt = CheckpointableAdam(0.1) + obj.opt.minimize(obj.var.read_value()) + self.evaluate(variables.global_variables_initializer()) + checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=obj) + before_ops = graph.get_operations() + checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=obj) + self.assertEqual(before_ops, graph.get_operations()) + + @unittest.skip("todo") + def testManyRestoresGraph(self): + """Restores after the first should not modify the graph.""" + with context.graph_mode(): + graph = ops.Graph() + with graph.as_default(), self.test_session(graph): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + obj = checkpointable.Checkpointable() + obj.var = variable_scope.get_variable(name="v", initializer=0.) + obj.opt = CheckpointableAdam(0.1) + obj.opt.minimize(obj.var.read_value()) + self.evaluate(variables.global_variables_initializer()) + save_path = checkpointable_utils.save( + checkpoint_prefix, root_checkpointable=obj) + checkpointable_utils.restore( + save_path, root_checkpointable=obj) + before_ops = graph.get_operations() + checkpointable_utils.restore( + save_path, root_checkpointable=obj) + self.assertEqual(before_ops, graph.get_operations()) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index f563d32388..cee7c47e00 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2518,6 +2518,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":array_ops", + ":checkpointable", ":control_flow_ops", ":dtypes", ":framework_ops", @@ -2851,6 +2852,30 @@ py_library( ], ) +py_library( + name = "checkpointable", + srcs = ["training/checkpointable.py"], + srcs_version = "PY2AND3", + deps = [ + ":dtypes", + ":io_ops_gen", + ":ops", + ":pywrap_tensorflow", + ":util", + "//tensorflow/python/eager:context", + ], +) + +py_test( + name = "checkpointable_test", + srcs = ["training/checkpointable_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":checkpointable", + ":client_testlib", + ], +) + py_test( name = "evaluation_test", size = "small", diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 25cf5aca83..09d349fc2d 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -35,6 +35,7 @@ from tensorflow.python.ops import variables # pylint: disable=wildcard-import from tensorflow.python.ops.gen_resource_variable_ops import * # pylint: enable=wildcard-import +from tensorflow.python.training import checkpointable from tensorflow.python.util import compat @@ -348,6 +349,11 @@ class ResourceVariable(variables.Variable): if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") + if isinstance(initial_value, checkpointable.CheckpointInitialValue): + self._maybe_initialize_checkpointable() + self._update_uid = initial_value.checkpoint_position.restore_uid + initial_value = initial_value.wrapped_value + self._trainable = trainable if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 19e3298e40..125922e296 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -29,6 +29,7 @@ from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import compat from tensorflow.python.util import tf_should_use from tensorflow.python.util.deprecation import deprecated @@ -36,7 +37,7 @@ from tensorflow.python.util.tf_export import tf_export @tf_export("Variable") -class Variable(object): +class Variable(checkpointable.Checkpointable): """See the @{$variables$Variables How To} for a high level overview. A variable maintains state in the graph across calls to `run()`. You add a @@ -306,6 +307,11 @@ class Variable(object): if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") + if isinstance(initial_value, checkpointable.CheckpointInitialValue): + self._maybe_initialize_checkpointable() + self._update_uid = initial_value.checkpoint_position.restore_uid + initial_value = initial_value.wrapped_value + if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] with ops.init_scope(): @@ -786,6 +792,20 @@ class Variable(object): setattr(Variable, operator, _run_op) + def _scatter_tensors_from_checkpoint(self, attributes): + """For implementing `Checkpointable`. Return an assignment op to run.""" + if (len(attributes) != 1 + or checkpointable.VARIABLE_VALUE_KEY not in attributes): + raise ValueError( + ("The variable %s was restored with unexpected values (expected one " + "with key %s, got %s)") % ( + self, checkpointable.VARIABLE_VALUE_KEY, attributes)) + return self.assign(attributes[checkpointable.VARIABLE_VALUE_KEY]) + + def _gather_tensors_for_checkpoint(self): + """For implementing `Checkpointable`. This object is saveable on its own.""" + return {checkpointable.VARIABLE_VALUE_KEY: self} + def _try_guard_against_uninitialized_dependencies(self, initial_value): """Attempt to guard against dependencies on uninitialized variables. diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py new file mode 100644 index 0000000000..c2fea0f40d --- /dev/null +++ b/tensorflow/python/training/checkpointable.py @@ -0,0 +1,584 @@ +"""An object-local variable management scheme.""" +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import weakref + +from tensorflow.python import pywrap_tensorflow +from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import gen_io_ops as io_ops +from tensorflow.python.util import nest + +# A key indicating a variable's value in an object's checkpointed Tensors +# (Checkpointable._gather_tensors_for_checkpoint). If this is the only key and +# the object has no dependencies, then its value may be restored on object +# creation (avoiding double assignment when executing eagerly). +VARIABLE_VALUE_KEY = "VARIABLE_VALUE" + +_CheckpointableReference = collections.namedtuple( + "_CheckpointableReference", + [ + # The local name for this dependency. + "name", + # The Checkpointable object being referenced. + "ref" + ]) + + +class CheckpointInitialValue(ops.Tensor): + """Tensor wrapper for managing update UIDs in `Variables`. + + When supplied as an initial value, objects of this type let a `Variable` + (`Variable`, `ResourceVariable`, etc.) know the UID of the restore the initial + value came from. This allows deferred restorations to be sequenced in the + order the user specified them, and lets us fall back on assignment if an + initial value is not set (e.g. due to a custom getter interfering). + + See comments in _add_variable_with_custom_getter for more information about + how `CheckpointInitialValue` is used. + """ + + def __init__(self, checkpoint_position, shape=None): + self.wrapped_value = checkpoint_position.restore_ops()[ + VARIABLE_VALUE_KEY] + if shape: + # We need to set the static shape information on the initializer if + # possible so we don't get a variable with an unknown shape. + self.wrapped_value.set_shape(shape) + self._checkpoint_position = checkpoint_position + + @property + def __class__(self): + return (self.wrapped_value.__class__, CheckpointInitialValue) + + def __getattr__(self, attr): + try: + return getattr(self.wrapped_value, attr) + except AttributeError: + return self.__getattribute__(attr) + + @property + def checkpoint_position(self): + return self._checkpoint_position + + +class _CheckpointPosition(object): + """Indicates a position within a `_Checkpoint`.""" + + def __init__(self, checkpoint, proto_id): + """Specify an object within a checkpoint. + + Args: + checkpoint: A _Checkpoint object. + proto_id: The index of this object in CheckpointableObjectGraph.nodes. + """ + self._checkpoint = checkpoint + self._proto_id = proto_id + + def restore(self, checkpointable): + """Restore this value into `checkpointable`.""" + if self.bind_object(checkpointable): + # This object's correspondence with a checkpointed object is new, so + # process deferred restorations for it and its dependencies. + restore_ops = checkpointable._restore_from_checkpoint_position(self) # pylint: disable=protected-access + session = self._checkpoint.session + if session: + session.run(restore_ops) + + def bind_object(self, checkpointable): + """Set a checkpoint<->object correspondence and process slot variables. + + Args: + checkpointable: The object to record a correspondence for. + Returns: + True if this is a new assignment, False if this object has already been + mapped to a checkpointed `Object` proto. + Raises: + AssertionError: If another object is already bound to the `Object` proto. + """ + checkpoint = self.checkpoint + current_assignment = checkpoint.object_by_proto_id.get(self._proto_id, None) + if current_assignment is None: + checkpoint.object_by_proto_id[self._proto_id] = checkpointable + for deferred_slot_restoration in ( + checkpoint.deferred_slot_restorations.pop(self._proto_id, ())): + checkpointable._process_slot_restoration( # pylint: disable=protected-access + slot_variable_position=_CheckpointPosition( + checkpoint=checkpoint, + proto_id=deferred_slot_restoration.slot_variable_id), + variable=deferred_slot_restoration.original_variable, + slot_name=deferred_slot_restoration.slot_name) + for slot_restoration in checkpoint.slot_restorations.get( + self._proto_id, ()): + optimizer_object = checkpoint.object_by_proto_id.get( + slot_restoration.optimizer_id, None) + if optimizer_object is None: + # The optimizer has not yet been created or tracked. Record in the + # checkpoint that the slot variables need to be restored when it is. + checkpoint.deferred_slot_restorations.setdefault( + slot_restoration.optimizer_id, []).append( + _DeferredSlotVariableRestoration( + original_variable=checkpointable, + slot_variable_id=slot_restoration.slot_variable_id, + slot_name=slot_restoration.slot_name)) + else: + optimizer_object._process_slot_restoration( # pylint: disable=protected-access + slot_variable_position=_CheckpointPosition( + checkpoint=checkpoint, + proto_id=slot_restoration.slot_variable_id), + variable=checkpointable, + slot_name=slot_restoration.slot_name) + return True # New assignment + else: + # The object was already mapped for this checkpoint load, which means + # we don't need to do anything besides check that the mapping is + # consistent (if the dependency DAG is not a tree then there are + # multiple paths to the same object). + if current_assignment is not checkpointable: + raise AssertionError( + ("Unable to load the checkpoint into this object graph. Either " + "the Checkpointable object references in the Python program " + "have changed in an incompatible way, or the checkpoint was " + "generated in an incompatible program.\n\nTwo checkpoint " + "references resolved to different objects (%s and %s).") + % (current_assignment, checkpointable)) + return False # Not a new assignment + + def is_simple_variable(self): + """Determine whether this value is restorable with a Tensor initializer.""" + attributes = self.object_proto.attributes + return (len(attributes) == 1 + and attributes[0].name == VARIABLE_VALUE_KEY + and not self.object_proto.children) + + def restore_ops(self): + """Create restore ops for this object's attributes.""" + restore_tensors = {} + for serialized_tensor in self.object_proto.attributes: + checkpoint_key = serialized_tensor.checkpoint_key + dtype = self._checkpoint.dtype_map[checkpoint_key] + base_type = dtype.base_dtype + with ops.init_scope(): + restore, = io_ops.restore_v2( + prefix=self._checkpoint.save_path, + tensor_names=[checkpoint_key], + shape_and_slices=[""], + dtypes=[base_type], + name="%s_checkpoint_read" % (serialized_tensor.name,)) + restore_tensors[serialized_tensor.name] = restore + return restore_tensors + + @property + def checkpoint(self): + return self._checkpoint + + @property + def checkpointable(self): + return self._checkpoint.object_by_proto_id[self._proto_id] + + @property + def object_proto(self): + return self._checkpoint.object_graph_proto.nodes[self._proto_id] + + @property + def restore_uid(self): + return self._checkpoint.restore_uid + + def __repr__(self): + return repr(self.object_proto) + + +_DeferredSlotVariableRestoration = collections.namedtuple( + "_DeferredSlotVariableRestoration", + [ + "original_variable", + "slot_variable_id", + "slot_name", + ] +) + +_SlotVariableRestoration = collections.namedtuple( + "_SlotVariableRestoration", + [ + # The checkpoint proto id of the optimizer object. + "optimizer_id", + # The checkpoint proto id of the slot variable. + "slot_variable_id", + "slot_name", + ]) + + +class _Checkpoint(object): + """Holds the status of an object-based checkpoint load.""" + + def __init__(self, object_graph_proto, save_path, session): + """Specify the checkpoint being loaded. + + Args: + object_graph_proto: The CheckpointableObjectGraph protocol buffer + associated with this checkpoint. + save_path: The path to the checkpoint, as returned by + `tf.train.latest_checkpoint`. + session: The session to evaluate assignment ops in. Should be None if + executing eagerly. + + Raises: + ValueError: If `session` is not None and eager execution is enabled. + """ + self.object_graph_proto = object_graph_proto + self.restore_uid = ops.uid() + # Dictionary mapping from an id in the protocol buffer flat array to + # Checkpointable Python objects. This mapping may be deferred if a + # checkpoint is restored before all dependencies have been tracked. Uses + # weak references so that partial restorations don't create reference cycles + # (as objects with deferred dependencies will generally have references to + # this object). + self.object_by_proto_id = weakref.WeakValueDictionary() + self.save_path = save_path + reader = pywrap_tensorflow.NewCheckpointReader(save_path) + self.dtype_map = reader.get_variable_to_dtype_map() + # A mapping from optimizer proto ids to lists of slot variables to be + # restored when the optimizer is tracked. Only includes slot variables whose + # regular variables have already been created, and only for optimizer + # objects which have not yet been created/tracked. + self.deferred_slot_restorations = {} + # A mapping from variable proto ids to lists of slot variables to be + # restored when the variable is created/tracked. These get shifted over to + # deferred_slot_restorations if the optimizer hasn't been created when that + # happens. + self.slot_restorations = {} + for node_index, node in enumerate(self.object_graph_proto.nodes): + for slot_reference in node.slot_variables: + # `node` refers to an `Optimizer`, since only these have slot variables. + self.slot_restorations.setdefault( + slot_reference.original_variable_node_id, []).append( + _SlotVariableRestoration( + optimizer_id=node_index, + slot_variable_id=slot_reference.slot_variable_node_id, + slot_name=slot_reference.slot_name)) + if session is not None and context.in_eager_mode(): + raise ValueError( + "Passed a session %s when executing eagerly." % (session,)) + self.session = session + + +class Checkpointable(object): + """Manages dependencies on other objects. + + `Checkpointable` objects may have dependencies: other `Checkpointable` objects + which should be saved if the object declaring the dependency is saved. A + correctly saveable program has a dependency graph such that if changing a + global variable affects an object (e.g. changes the behavior of any of its + methods) then there is a chain of dependencies from the influenced object to + the variable. + + Dependency edges have names, and are created implicitly when a + `Checkpointable` object is assigned to an attribute of another + `Checkpointable` object. For example: + + ``` + obj = Checkpointable() + obj.v = ResourceVariable(0.) + ``` + + The `Checkpointable` object `obj` now has a dependency named "v" on a + variable. + + `Checkpointable` objects may specify `Tensor`s to be saved and restored + directly (e.g. a `Variable` indicating how to save itself) rather than through + dependencies on other objects. See + `Checkpointable._scatter_tensors_from_checkpoint` and + `Checkpointable._gather_tensors_for_checkpoint` for details. + """ + + def _maybe_initialize_checkpointable(self): + """Initialize dependency management. + + Not __init__, since most objects will forget to call it. + """ + if hasattr(self, "_checkpoint_dependencies"): + # __init__ already called. This check means that we don't need + # Checkpointable.__init__() in the constructor of every TensorFlow object. + return + # A list of _CheckpointableReference objects. + self._checkpoint_dependencies = [] + # Maps names -> Checkpointable objects + self._dependency_names = {} + # Restorations for other Checkpointable objects on which this object may + # eventually depend. + self._deferred_dependencies = {} # local name -> _CheckpointPosition list + # The UID of the highest assignment to this object. Used to ensure that the + # last requested assignment determines the final value of an object. + if hasattr(self, "_update_uid"): + raise AssertionError( + "Internal error: the object had an update UID set before its " + "initialization code was run.") + self._update_uid = -1 + + def __setattr__(self, name, value): + """Support self.foo = checkpointable syntax.""" + # Perform the attribute assignment, and potentially call other __setattr__ + # overrides such as that for tf.keras.Model. + super(Checkpointable, self).__setattr__(name, value) + if isinstance(value, Checkpointable): + self._track_checkpointable( + value, name=name, + # Allow the user to switch the Checkpointable which is tracked by this + # name, since assigning a new variable to an attribute has + # historically been fine (e.g. Adam did this). + # TODO(allenl): Should this be a warning once Checkpointable save/load + # is usable? + overwrite=True) + + def _add_variable_with_custom_getter( + self, name, shape=None, dtype=dtypes.float32, + initializer=None, getter=None, **kwargs_for_getter): + """Restore-on-create for a variable be saved with this `Checkpointable`. + + If the user has requested that this object or another `Checkpointable` which + depends on this object be restored from a checkpoint (deferred loading + before variable object creation), `initializer` may be ignored and the value + from the checkpoint used instead. + + Args: + name: A name for the variable. Must be unique within this object. + shape: The shape of the variable. + dtype: The data type of the variable. + + initializer: The initializer to use. Ignored if there is a deferred + restoration left over from a call to + `_restore_from_checkpoint_position`. + + getter: The getter to wrap which actually fetches the variable. + **kwargs_for_getter: Passed to the getter. + + Returns: + The new variable object. + + Raises: + ValueError: If the variable name is not unique. + """ + self._maybe_initialize_checkpointable() + if name in self._dependency_names: + raise ValueError( + ("A variable named '%s' already exists in this Checkpointable, but " + "Checkpointable._add_variable called to create another with " + "that name. Variable names must be unique within a Checkpointable " + "object.") % (name,)) + # If this is a variable with a single Tensor stored in the checkpoint, we + # can set that value as an initializer rather than initializing and then + # assigning (when executing eagerly). + checkpoint_initializer = self._preload_simple_restoration( + name=name, shape=shape) + if (checkpoint_initializer is not None + and not ( + isinstance(initializer, CheckpointInitialValue) + and initializer.restore_uid > checkpoint_initializer.restore_uid)): + # If multiple Checkpointable objects are "creating" the same variable via + # the magic of custom getters, the one with the highest restore UID (the + # one called last) has to make the final initializer. If another custom + # getter interrupts this process by overwriting the initializer, then + # we'll catch that when we call _track_checkpointable. So this is "best + # effort" to set the initializer with the highest restore UID. + initializer = checkpoint_initializer + shape = None + checkpoint_position = checkpoint_initializer.checkpoint_position + else: + checkpoint_position = None + + new_variable = getter( + name=name, shape=shape, dtype=dtype, initializer=initializer, + **kwargs_for_getter) + + if (checkpoint_position is not None + and hasattr(new_variable, "_update_uid") + and new_variable._update_uid == checkpoint_position.restore_uid): # pylint: disable=protected-access + session = checkpoint_position.checkpoint.session + if session: + session.run(new_variable.initializer) + # If we set an initializer and the variable processed it, tracking will not + # assign again. It will add this variable to our dependencies, and if there + # is a non-trivial restoration queued, it will handle that. This also + # handles slot variables. + return self._track_checkpointable(new_variable, name=name) + + def _preload_simple_restoration(self, name, shape): + """Return a dependency's value for restore-on-create. + + Note the restoration is not deleted; if for some reason preload is called + and then not assigned to the variable (for example because a custom getter + overrides the initializer), the assignment will still happen once the + variable is tracked (determined based on checkpoint.restore_uid). + + Args: + name: The object-local name of the dependency holding the variable's + value. + shape: The shape of the variable being loaded into. + Returns: + An callable for use as a variable's initializer/initial_value, or None if + one should not be set (either because there was no variable with this name + in the checkpoint or because it needs more complex deserialization). Any + non-trivial deserialization will happen when the variable object is + tracked. + """ + deferred_dependencies_list = self._deferred_dependencies.get(name, ()) + if not deferred_dependencies_list: + # Nothing to do; we don't have a restore for this dependency queued up. + return + for checkpoint_position in deferred_dependencies_list: + if not checkpoint_position.is_simple_variable(): + # If _any_ pending restoration is too complicated to fit in an + # initializer (because it has dependencies, or because there are + # multiple Tensors to restore), bail and let the general tracking code + # handle it. + return None + checkpoint_position = max( + deferred_dependencies_list, + key=lambda restore: restore.checkpoint.restore_uid) + return CheckpointInitialValue( + checkpoint_position=checkpoint_position, shape=shape) + + def _track_checkpointable(self, checkpointable, name, overwrite=False): + """Declare a dependency on another `Checkpointable` object. + + Indicates that checkpoints for this object should include variables from + `checkpointable`. + + Variables in a checkpoint are mapped to `Checkpointable`s based on names if + provided when the checkpoint was written, but otherwise use the order those + `Checkpointable`s were declared as dependencies. + + To avoid breaking existing checkpoints when modifying a class, neither + variable names nor dependency names (the names passed to + `track_checkpointable`) may change. + + Args: + checkpointable: A `Checkpointable` which this object depends on. + name: A local name for `checkpointable`, used for loading checkpoints into + the correct objects. + overwrite: Boolean, whether silently replacing dependencies is OK. Used + for __setattr__, where throwing an error on attribute reassignment would + be inappropriate. + + Returns: + `checkpointable`, for convenience when declaring a dependency and + assigning to a member variable in one statement. + + Raises: + TypeError: If `checkpointable` does not inherit from `Checkpointable`. + ValueError: If another object is already tracked by this name. + """ + self._maybe_initialize_checkpointable() + if not isinstance(checkpointable, Checkpointable): + raise TypeError( + ("Checkpointable._track_checkpointable() passed type %s, not a " + "Checkpointable.") % (type(checkpointable),)) + new_reference = _CheckpointableReference(name=name, ref=checkpointable) + if (name in self._dependency_names + and self._dependency_names[name] is not checkpointable): + if not overwrite: + raise ValueError( + ("Called Checkpointable._track_checkpointable() with name='%s', " + "but a Checkpointable with this name is already declared as a " + "dependency. Names must be unique (or overwrite=True).") % (name,)) + # This is a weird thing to do, but we're not going to stop people from + # using __setattr__. + for index, (old_name, _) in enumerate(self._checkpoint_dependencies): + if name == old_name: + self._checkpoint_dependencies[index] = new_reference + else: + self._checkpoint_dependencies.append(new_reference) + + self._dependency_names[name] = checkpointable + deferred_dependency_list = self._deferred_dependencies.pop(name, None) + if deferred_dependency_list is not None: + for checkpoint_position in deferred_dependency_list: + checkpoint_position.restore(checkpointable=checkpointable) + return checkpointable + + def _restore_from_checkpoint_position(self, checkpoint_position): + """Restore this object and its dependencies (may be deferred).""" + # Attempt a breadth-first traversal, since presumably the user has more + # control over shorter paths. If we don't have all of the dependencies at + # this point, the end result is not breadth-first (since other deferred + # traversals will happen later). + visit_queue = collections.deque([checkpoint_position]) + restore_ops = [] + while visit_queue: + current_position = visit_queue.popleft() + restore_ops.extend(nest.flatten( + current_position.checkpointable # pylint: disable=protected-access + ._single_restoration_from_checkpoint_position( + checkpoint_position=current_position, + visit_queue=visit_queue))) + return restore_ops + + def _single_restoration_from_checkpoint_position( + self, checkpoint_position, visit_queue): + """Restore this object, and either queue its dependencies or defer them.""" + self._maybe_initialize_checkpointable() + checkpoint = checkpoint_position.checkpoint + # If the UID of this restore is lower than our current update UID, we don't + # need to actually restore the object. However, we should pass the + # restoration on to our dependencies. + if checkpoint.restore_uid > self._update_uid: + restore_op = self._scatter_tensors_from_checkpoint( + checkpoint_position.restore_ops()) + self._update_uid = checkpoint.restore_uid + else: + restore_op = () + for child in checkpoint_position.object_proto.children: + child_position = _CheckpointPosition( + checkpoint=checkpoint, + proto_id=child.node_id) + local_object = self._dependency_names.get(child.local_name, None) + if local_object is None: + # We don't yet have a dependency registered with this name. Save it + # in case we do. + self._deferred_dependencies.setdefault(child.local_name, []).append( + child_position) + else: + if child_position.bind_object(checkpointable=local_object): + # This object's correspondence is new, so dependencies need to be + # visited. Delay doing it so that we get a breadth-first dependency + # resolution order (shallowest paths first). The caller is responsible + # for emptying visit_queue. + visit_queue.append(child_position) + return restore_op + + def _scatter_tensors_from_checkpoint(self, attributes): + """Restores this object from a checkpoint. + + Args: + attributes: A dictionary of Tensors, with key corresponding to those + returned from _gather_tensors_for_checkpoint. + Returns: + A restore op to run (if graph building). + """ + if attributes: + raise AssertionError( + ("A Checkpointable object which was not expecting any data received " + "some from a checkpoint. (Got %s)") % (attributes,)) + return () # No restore ops + + def _gather_tensors_for_checkpoint(self): + """Returns a dictionary of Tensors to save with this object.""" + return {} diff --git a/tensorflow/python/training/checkpointable_test.py b/tensorflow/python/training/checkpointable_test.py new file mode 100644 index 0000000000..e79acb4975 --- /dev/null +++ b/tensorflow/python/training/checkpointable_test.py @@ -0,0 +1,39 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.platform import test +from tensorflow.python.training import checkpointable + + +class InterfaceTests(test.TestCase): + + def testMultipleAssignment(self): + root = checkpointable.Checkpointable() + root.leaf = checkpointable.Checkpointable() + root.leaf = root.leaf + duplicate_name_dep = checkpointable.Checkpointable() + with self.assertRaises(ValueError): + root._track_checkpointable(duplicate_name_dep, name="leaf") + # No error; we're overriding __setattr__, so we can't really stop people + # from doing this while maintaining backward compatibility. + root.leaf = duplicate_name_dep + root._track_checkpointable(duplicate_name_dep, name="leaf", overwrite=True) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index f05c40b32d..762658175a 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -34,6 +34,7 @@ from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.training import checkpointable from tensorflow.python.training import slot_creator from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -212,7 +213,7 @@ def _get_processor(v): @tf_export("train.Optimizer") -class Optimizer(object): +class Optimizer(checkpointable.Checkpointable): """Base class for optimizers. This class defines the API to add Ops to train a model. You never use this @@ -924,3 +925,47 @@ class Optimizer(object): if _var_key(var) not in named_slots: named_slots[_var_key(var)] = slot_creator.create_zeros_slot(var, op_name) return named_slots[_var_key(var)] + + def _process_slot_restoration( + self, slot_variable_position, slot_name, variable): + """Restore a slot variable's value (creating it if necessary). + + Args: + slot_variable_position: A `checkpointable._CheckpointPosition` object + indicating the slot variable `Checkpointable` object to be restored. + slot_name: The name of this `Optimizer`'s slot to restore into. + variable: The variable object this slot is being created for. + """ + named_slots = self._slot_dict(slot_name) + variable_key = _var_key(variable) + slot_variable = named_slots.get(variable_key, None) + if slot_variable is None: + if slot_variable_position.is_simple_variable(): + initializer = checkpointable.CheckpointInitialValue( + checkpoint_position=slot_variable_position) + slot_variable = self._get_or_make_slot( + var=variable, + val=initializer, + slot_name=slot_name, + op_name=self._name) + if slot_variable._update_uid == slot_variable_position.restore_uid: # pylint: disable=protected-access + # If our restoration was set (not given with custom getters), run + # it. Otherwise wait for the restore() call below to restore if + # necessary. + session = slot_variable_position.checkpoint.session + if session: + session.run(slot_variable.initializer) + + else: + raise NotImplementedError( + "Currently only variables with no dependencies can be loaded as " + "slot variables. File a feature request if this limitation bothers " + "you. (Got %s)" % (slot_variable_position,)) + # Slot variables are not owned by any one object (because we don't want to + # save the slot variable if the optimizer is saved without the non-slot + # variable, or if the non-slot variable is saved without the optimizer; + # it's a dependency hypergraph with edges of the form (optimizer, non-slot + # variable, variable)). So we don't _track_ slot variables anywhere, and + # instead special-case this dependency and otherwise pretend it's a normal + # graph. + slot_variable_position.restore(slot_variable) diff --git a/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt index bc7cf7267f..069200065a 100644 --- a/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-variable.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.Variable" tf_class { is_instance: "" + is_instance: "" is_instance: "" member { name: "SaveSliceInfo" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt index 863beaea4c..4eea52596a 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdadeltaOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt index 0a7aa9b6bc..5aaaf0e20b 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdagradDAOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt index 83724fea55..7f1201879c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdagradOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt index e285b27a05..503c439d83 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.AdamOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt index fc28577d6e..39c071748c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.FtrlOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt index bf3c1d81f8..6b441786ca 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.GradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt index a640c8d2c6..80f3963bac 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.MomentumOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt index 6b33c236a3..c880ba328a 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.train.Optimizer" tf_class { is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt index d23fcaed7b..6acdf35f78 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.ProximalAdagradOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt index b6c03e71d9..00b1e309e3 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.ProximalGradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt index 4a82db11cb..05dc391cab 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.RMSPropOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt index e9131bf544..4be2819261 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.train.SyncReplicasOptimizer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "GATE_GRAPH" diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index d9d7929959..791016e8b7 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -149,7 +149,7 @@ sh_binary( "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", "//tensorflow/contrib/data/python/ops:contrib_op_loader", "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable", + "//tensorflow/contrib/eager/python:checkpointable_utils", "//tensorflow/contrib/eager/python:evaluator", "//tensorflow/contrib/gan:gan", "//tensorflow/contrib/graph_editor:graph_editor_pip", -- GitLab From 7905d2ae09e20ce628773f319229e21202d4379a Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 15 Feb 2018 14:08:54 -0800 Subject: [PATCH 0074/2939] Update tf.keras to version 2.1.4. PiperOrigin-RevId: 185897606 --- .../keras/applications/imagenet_utils.py | 3 ++- .../_impl/keras/applications/mobilenet.py | 4 ++-- .../python/keras/_impl/keras/constraints.py | 3 ++- .../keras/_impl/keras/datasets/cifar.py | 21 +++++++++---------- .../python/keras/_impl/keras/datasets/imdb.py | 6 ++---- .../python/keras/_impl/keras/initializers.py | 3 ++- .../keras/layers/convolutional_recurrent.py | 4 ++-- .../python/keras/_impl/keras/layers/local.py | 4 ++-- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py index d9cb726137..c26a28ed40 100644 --- a/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py +++ b/tensorflow/python/keras/_impl/keras/applications/imagenet_utils.py @@ -234,7 +234,8 @@ def decode_predictions(preds, top=5): CLASS_INDEX_PATH, cache_subdir='models', file_hash='c2c37ea517e94d9795004a39431a14cb') - CLASS_INDEX = json.load(open(fpath)) + with open(fpath) as f: + CLASS_INDEX = json.load(f) results = [] for pred in preds: top_indices = pred.argsort()[-top:][::-1] diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py index 027ae26113..1bbbedb85e 100644 --- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py +++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py @@ -561,7 +561,7 @@ def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. @@ -627,7 +627,7 @@ def _depthwise_conv_block(inputs, (with `channels_last` data format) or (channels, rows, cols) (with `channels_first` data format). pointwise_conv_filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the pointwise convolution). + (i.e. the number of output filters in the pointwise convolution). alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. diff --git a/tensorflow/python/keras/_impl/keras/constraints.py b/tensorflow/python/keras/_impl/keras/constraints.py index ab62d575e3..271fbbb63d 100644 --- a/tensorflow/python/keras/_impl/keras/constraints.py +++ b/tensorflow/python/keras/_impl/keras/constraints.py @@ -202,4 +202,5 @@ def get(identifier): elif callable(identifier): return identifier else: - raise ValueError('Could not interpret constraint identifier:', identifier) + raise ValueError('Could not interpret constraint identifier: ' + + str(identifier)) diff --git a/tensorflow/python/keras/_impl/keras/datasets/cifar.py b/tensorflow/python/keras/_impl/keras/datasets/cifar.py index 7ada3340a5..02344897f7 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/cifar.py +++ b/tensorflow/python/keras/_impl/keras/datasets/cifar.py @@ -34,17 +34,16 @@ def load_batch(fpath, label_key='labels'): Returns: A tuple `(data, labels)`. """ - f = open(fpath, 'rb') - if sys.version_info < (3,): - d = cPickle.load(f) - else: - d = cPickle.load(f, encoding='bytes') - # decode utf8 - d_decoded = {} - for k, v in d.items(): - d_decoded[k.decode('utf8')] = v - d = d_decoded - f.close() + with open(fpath, 'rb') as f: + if sys.version_info < (3,): + d = cPickle.load(f) + else: + d = cPickle.load(f, encoding='bytes') + # decode utf8 + d_decoded = {} + for k, v in d.items(): + d_decoded[k.decode('utf8')] = v + d = d_decoded data = d['data'] labels = d[label_key] diff --git a/tensorflow/python/keras/_impl/keras/datasets/imdb.py b/tensorflow/python/keras/_impl/keras/datasets/imdb.py index e2dddf7730..7467bb2464 100644 --- a/tensorflow/python/keras/_impl/keras/datasets/imdb.py +++ b/tensorflow/python/keras/_impl/keras/datasets/imdb.py @@ -144,7 +144,5 @@ def get_word_index(path='imdb_word_index.json'): path, origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.json', file_hash='bfafd718b763782e994055a2d397834f') - f = open(path) - data = json.load(f) - f.close() - return data + with open(path) as f: + return json.load(f) diff --git a/tensorflow/python/keras/_impl/keras/initializers.py b/tensorflow/python/keras/_impl/keras/initializers.py index 338c669f97..300bed5e14 100644 --- a/tensorflow/python/keras/_impl/keras/initializers.py +++ b/tensorflow/python/keras/_impl/keras/initializers.py @@ -209,4 +209,5 @@ def get(identifier): elif callable(identifier): return identifier else: - raise ValueError('Could not interpret initializer identifier:', identifier) + raise ValueError('Could not interpret initializer identifier: ' + + str(identifier)) diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py index a04c3a24bf..d2792b9636 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py @@ -39,7 +39,7 @@ class ConvRecurrent2D(Recurrent): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, @@ -200,7 +200,7 @@ class ConvLSTM2D(ConvRecurrent2D): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of n integers, specifying the dimensions of the convolution window. strides: An integer or tuple/list of n integers, diff --git a/tensorflow/python/keras/_impl/keras/layers/local.py b/tensorflow/python/keras/_impl/keras/layers/local.py index 798ac236a3..df0efe6b8b 100644 --- a/tensorflow/python/keras/_impl/keras/layers/local.py +++ b/tensorflow/python/keras/_impl/keras/layers/local.py @@ -53,7 +53,7 @@ class LocallyConnected1D(Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of a single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of a single integer, @@ -222,7 +222,7 @@ class LocallyConnected2D(Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for -- GitLab From a805116366eddcaa8eb6a602398f8efae076e0b5 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 15 Feb 2018 14:24:40 -0800 Subject: [PATCH 0075/2939] [tf.data] Return OK and set `*end_of_sequence = true` when repeating an empty dataset. Returning an error status could lead to situations (like `empty_ds.repeat(None).interleave(...)`) where the wrong exception was raised. This change ensures that the proper `OutOfRangeError` is raised in the user program. PiperOrigin-RevId: 185900119 --- tensorflow/core/kernels/data/repeat_dataset_op.cc | 11 +++++------ tensorflow/core/kernels/data/shuffle_dataset_op.cc | 11 +++++------ .../kernel_tests/interleave_dataset_op_test.py | 14 ++++++++++++++ .../data/kernel_tests/sequence_dataset_op_test.py | 4 +--- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/data/repeat_dataset_op.cc b/tensorflow/core/kernels/data/repeat_dataset_op.cc index 1cb533158b..d37086541d 100644 --- a/tensorflow/core/kernels/data/repeat_dataset_op.cc +++ b/tensorflow/core/kernels/data/repeat_dataset_op.cc @@ -187,12 +187,11 @@ class RepeatDatasetOp : public UnaryDatasetOpKernel { } else { input_impl_.reset(); if (first_call) { - // If the first call to GetNext() fails because the end of - // sequence has been reached, we return an OutOfRange error to - // terminate the iteration. (Otherwise, this iterator would loop - // infinitely and never produce a value.) - return errors::OutOfRange( - "Attempted to repeat an empty dataset infinitely."); + // If the first call to GetNext() fails because the end + // of sequence has been reached, we terminate the + // iteration immediately. (Otherwise, this iterator + // would loop infinitely and never produce a value.) + return Status::OK(); } } } while (true); diff --git a/tensorflow/core/kernels/data/shuffle_dataset_op.cc b/tensorflow/core/kernels/data/shuffle_dataset_op.cc index 1dde236c17..2f6bf83da5 100644 --- a/tensorflow/core/kernels/data/shuffle_dataset_op.cc +++ b/tensorflow/core/kernels/data/shuffle_dataset_op.cc @@ -104,13 +104,12 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel { break; } if (first_call && dataset()->count_ == -1) { - // If the first call to GetNext() fails because the end of - // sequence has been reached, we return an OutOfRange error to - // terminate the iteration. (Otherwise, this iterator may loop - // infinitely and never produce a value.) + // If the first call to GetNext() fails because the end + // of sequence has been reached, we terminate the + // iteration immediately. (Otherwise, this iterator + // would loop infinitely and never produce a value.) *end_of_sequence = true; - return errors::OutOfRange( - "Attempted to repeat an empty dataset infinitely."); + return Status::OK(); } epoch_++; int64 n = slices_.back()->end; diff --git a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py index 28cb50c002..7dbf7268d7 100644 --- a/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/interleave_dataset_op_test.py @@ -201,6 +201,20 @@ class InterleaveDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testEmptyInput(self): + iterator = ( + dataset_ops.Dataset.from_tensor_slices([]) + .repeat(None) + .interleave(dataset_ops.Dataset.from_tensors, cycle_length=2) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py index ae08032e19..1d27b036eb 100644 --- a/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/sequence_dataset_op_test.py @@ -201,9 +201,7 @@ class SequenceDatasetTest(test.TestCase): with self.test_session() as sess: sess.run(init_op) - with self.assertRaisesRegexp( - errors.OutOfRangeError, - "Attempted to repeat an empty dataset infinitely."): + with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) -- GitLab From 66f4f4cf31b86b7dd20f10ce6d968348b502f2ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 14:25:00 -0800 Subject: [PATCH 0076/2939] Automated g4 rollback of changelist 185072479 PiperOrigin-RevId: 185900165 --- .../grappler/optimizers/constant_folding.cc | 36 +++++++++++++++---- .../grappler/optimizers/constant_folding.h | 2 ++ .../optimizers/constant_folding_test.cc | 30 +++++++--------- tensorflow/core/kernels/snapshot_op.h | 17 +++++---- tensorflow/python/grappler/cluster_test.py | 4 +-- 5 files changed, 58 insertions(+), 31 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 1e6f11c8aa..8f89f2ae64 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1375,6 +1375,29 @@ void ConstantFolding::ReplaceOperationWithIdentity(int input_to_forward, graph_modified_ = true; } +void ConstantFolding::ReplaceOperationWithSnapshot(int input_to_forward, + NodeDef* node, + GraphDef* graph) { + node->set_op("Snapshot"); + DataType dtype = node->attr().at("T").type(); + node->clear_attr(); + (*node->mutable_attr())["T"].set_type(dtype); + + // Propagate the designated input through the Snapshot. + node->mutable_input()->SwapElements(0, input_to_forward); + // Add all other inputs as control dependencies. + for (int i = 1; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) { + break; + } + const string ctrl_dep = + AddControlDependency(node->input(i), graph, node_map_.get()); + node_map_->UpdateInput(node->name(), node->input(i), ctrl_dep); + node->set_input(i, ctrl_dep); + } + graph_modified_ = true; +} + void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node, GraphDef* graph) { node->set_op("Reciprocal"); @@ -1443,15 +1466,14 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, graph_modified_ = true; continue; } - const bool safe_to_use_shapes = - use_shape_info && (feed_nodes_.empty() || is_aggressive); + const bool is_mul = IsMul(*node); const bool is_matmul = IsMatMul(*node); const bool is_add = IsAdd(*node) || IsBiasAdd(*node); const bool is_sub = IsSub(*node); const bool is_any_div = IsAnyDiv(*node); // Simplify arithmetic operations with ones or zeros. - if (safe_to_use_shapes && + if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_any_div) && properties.HasInputProperties(node->name()) && properties.HasOutputProperties(node->name())) { @@ -1475,7 +1497,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, ((is_mul && x_is_one) || (is_add && x_is_zero))) { // TODO(rmlarsen): Handle subtraction 0 - y. // 1 * y = y or 0 + y = y. - ReplaceOperationWithIdentity(1, node, output); + ReplaceOperationWithSnapshot(1, node, output); continue; } @@ -1495,9 +1517,9 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); if (x_matches_output_shape && (((is_mul || is_any_div) && y_is_one) || - ((is_add || is_sub) && y_is_zero && is_aggressive))) { + ((is_add || is_sub) && y_is_zero))) { // x * 1 = x or x / 1 = x or x +/- 0 = x - ReplaceOperationWithIdentity(0, node, output); + ReplaceOperationWithSnapshot(0, node, output); continue; } @@ -1690,6 +1712,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { + LOG(INFO) << "Graph before: " << item.graph.DebugString(); nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); @@ -1716,6 +1739,7 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); + LOG(INFO) << "Graph after: " << output->DebugString(); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 18acc91e8a..e4078514af 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -79,6 +79,8 @@ class ConstantFolding : public GraphOptimizer { bool IsZeros(const NodeDef& node) const; void ReplaceOperationWithIdentity(int input_to_forward, NodeDef* node, GraphDef* graph); + void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, + GraphDef* graph); Status ReplaceOperationWithConstant(double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 46998dcc91..d8df19fe6a 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -195,8 +195,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"addn", "matmul3", "matmul4"}; - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -214,11 +213,11 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("^zeros", node.input(0)); EXPECT_EQ("^y", node.input(1)); } else if (name == "mul3") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^ones", node.input(1)); } else if (name == "mul4") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("y", node.input(0)); EXPECT_EQ("^ones", node.input(1)); } else if (name == "mul5") { @@ -230,7 +229,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("^zeros_1d", node.input(0)); EXPECT_EQ("^y", node.input(1)); } else if (name == "div1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^ones", node.input(1)); } else if (name == "div2") { @@ -266,15 +265,15 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ(2, t.tensor_shape().dim(0).size()); EXPECT_EQ(3, t.tensor_shape().dim(1).size()); } else if (name == "add1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "add2") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("y", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "bias_add1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros_1d", node.input(1)); } else if (name == "bias_add2") { @@ -283,7 +282,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("zeros", node.input(0)); EXPECT_EQ("bias", node.input(1)); } else if (name == "sub1") { - EXPECT_EQ("Identity", node.op()); + EXPECT_EQ("Snapshot", node.op()); EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "sub2") { @@ -322,8 +321,7 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"div_f", "div_i", "realdiv"}; - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -413,8 +411,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -468,8 +465,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding optimizer(RewriterConfig::AGGRESSIVE, - nullptr /* cpu_device */); + ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -1337,7 +1333,7 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) { GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + ConstantFolding fold(nullptr /* cpu_device */); GraphDef output; Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -1398,7 +1394,7 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch.push_back("reshape"); - ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */); + ConstantFolding fold(nullptr /* cpu_device */); GraphDef output; Status status = fold.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); diff --git a/tensorflow/core/kernels/snapshot_op.h b/tensorflow/core/kernels/snapshot_op.h index 2c79893b49..b94834f159 100644 --- a/tensorflow/core/kernels/snapshot_op.h +++ b/tensorflow/core/kernels/snapshot_op.h @@ -35,12 +35,17 @@ class SnapshotOp : public OpKernel { void Compute(OpKernelContext* context) override { const Tensor& input = context->input(0); Tensor* output = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(0, input.shape(), &output)); - const Device& device = context->eigen_device(); - device.memcpy(output->template flat().data(), - input.template flat().data(), - input.NumElements() * sizeof(Scalar)); + // Try to use buffer forwarding to avoid an explicit copy. + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {0}, 0, input.shape(), &output)); + if (!output->SharesBufferWith(input)) { + // We had to allocate a new buffer since the refcount on the input was + // greater than 1. Copy the input to the new buffer. + const Device& device = context->eigen_device(); + device.memcpy(output->template flat().data(), + input.template flat().data(), + input.NumElements() * sizeof(Scalar)); + } } }; diff --git a/tensorflow/python/grappler/cluster_test.py b/tensorflow/python/grappler/cluster_test.py index 10d515a364..caae5b114e 100644 --- a/tensorflow/python/grappler/cluster_test.py +++ b/tensorflow/python/grappler/cluster_test.py @@ -45,7 +45,7 @@ class ClusterTest(test.TestCase): op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts( grappler_item) self.assertTrue(run_time > 0) - self.assertEqual(len(op_perfs), 7) + self.assertEqual(len(op_perfs), 8) self.assertTrue(step_stats.dev_stats) def testNoDetailedStats(self): @@ -125,7 +125,7 @@ class ClusterTest(test.TestCase): disable_detailed_stats=False, disable_timeline=False) as gcluster: op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item) self.assertTrue(run_time > 0) - self.assertEqual(len(op_perfs), 7) + self.assertEqual(len(op_perfs), 8) self.assertTrue(step_stats.dev_stats) def testAvailableOps(self): -- GitLab From 41a8560e0c2fa3b8fa73622a15da53f5e1b8b6c8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 14:49:01 -0800 Subject: [PATCH 0077/2939] Implement Split PiperOrigin-RevId: 185904437 --- tensorflow/contrib/lite/builtin_op_data.h | 4 + tensorflow/contrib/lite/kernels/BUILD | 13 ++ .../internal/reference/reference_ops.h | 49 +++--- tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/kernels/split.cc | 159 ++++++++++++++++++ tensorflow/contrib/lite/kernels/split_test.cc | 147 ++++++++++++++++ tensorflow/contrib/lite/kernels/test_util.cc | 1 + tensorflow/contrib/lite/model.cc | 8 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 8 +- .../contrib/lite/schema/schema_generated.h | 141 +++++++++++++++- tensorflow/contrib/lite/testing/BUILD | 1 + .../contrib/lite/testing/generate_examples.py | 28 ++- .../testing/generated_examples_zip_test.cc | 1 + .../contrib/lite/toco/tflite/operator.cc | 23 ++- 15 files changed, 551 insertions(+), 35 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/split.cc create mode 100644 tensorflow/contrib/lite/kernels/split_test.cc diff --git a/tensorflow/contrib/lite/builtin_op_data.h b/tensorflow/contrib/lite/builtin_op_data.h index 5dbeadd165..5fc8954743 100644 --- a/tensorflow/contrib/lite/builtin_op_data.h +++ b/tensorflow/contrib/lite/builtin_op_data.h @@ -195,6 +195,10 @@ typedef struct { bool keep_dims; } TfLiteMeanParams; +typedef struct { + int num_splits; +} TfLiteSplitParams; + typedef struct { // TODO(ahentz): We can't have dynamic data in this struct, at least not yet. // For now we will fix the maximum possible number of dimensions. diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index d80c8bb671..b59dc5ffb3 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -129,6 +129,7 @@ cc_library( "skip_gram.cc", "space_to_batch_nd.cc", "space_to_depth.cc", + "split.cc", "squeeze.cc", "strided_slice.cc", "sub.cc", @@ -574,6 +575,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "split_test", + size = "small", + srcs = ["split_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "squeeze_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 2e0376656a..5f4d5be323 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -1590,6 +1590,33 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, } } +template +void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, + int axis, int outputs_count, Scalar* const* output_data, + const Dims<4>* const* output_dims) { + const int batches = ArraySize(*output_dims[0], 3); + const int height = ArraySize(*output_dims[0], 2); + const int width = ArraySize(*output_dims[0], 1); + const int depth = ArraySize(*output_dims[0], 0); + + const int slice_size = ArraySize(*output_dims[0], axis); + + for (int i = 0; i < outputs_count; ++i) { + int offset = i * slice_size * input_dims.strides[axis]; + for (int b = 0; b < batches; ++b) { + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + for (int c = 0; c < depth; ++c) { + auto out = Offset(*output_dims[i], c, x, y, b); + auto in = Offset(input_dims, c, x, y, b); + output_data[i][out] = input_data[offset + in]; + } + } + } + } + } +} + template void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, int outputs_count, Scalar* const* output_data, @@ -1600,28 +1627,12 @@ void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2); /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1); } - const int batches = MatchingArraySize(*output_dims[0], 3, input_dims, 3); - const int height = MatchingArraySize(*output_dims[0], 2, input_dims, 2); - const int width = MatchingArraySize(*output_dims[0], 1, input_dims, 1); // for now we dont have a model with a TensorFlowSplit // with fused activation function. TFLITE_DCHECK(Ac == FusedActivationFunctionType::kNone); - for (int b = 0; b < batches; ++b) { - for (int y = 0; y < height; ++y) { - for (int x = 0; x < width; ++x) { - int in_c = 0; - for (int i = 0; i < outputs_count; ++i) { - const int depth = ArraySize(*output_dims[i], 0); - for (int c = 0; c < depth; ++c) { - output_data[i][Offset(*output_dims[i], c, x, y, b)] = - input_data[Offset(input_dims, in_c, x, y, b)]; - in_c++; - } - } - TFLITE_DCHECK(in_c == ArraySize(input_dims, 0)); - } - } - } + + TensorFlowSplit(input_data, input_dims, /*axis=*/0, outputs_count, + output_data, output_dims); } // TODO(benoitjacob) make this a proper reference impl without Eigen! diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index fa870ddb40..edc4e26edb 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -58,6 +58,7 @@ TfLiteRegistration* Register_SPACE_TO_DEPTH(); TfLiteRegistration* Register_GATHER(); TfLiteRegistration* Register_TRANSPOSE(); TfLiteRegistration* Register_MEAN(); +TfLiteRegistration* Register_SPLIT(); TfLiteRegistration* Register_SQUEEZE(); TfLiteRegistration* Register_STRIDED_SLICE(); TfLiteRegistration* Register_EXP(); @@ -108,6 +109,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_MEAN, Register_MEAN()); AddBuiltin(BuiltinOperator_DIV, Register_DIV()); AddBuiltin(BuiltinOperator_SUB, Register_SUB()); + AddBuiltin(BuiltinOperator_SPLIT, Register_SPLIT()); AddBuiltin(BuiltinOperator_SQUEEZE, Register_SQUEEZE()); AddBuiltin(BuiltinOperator_STRIDED_SLICE, Register_STRIDED_SLICE()); AddBuiltin(BuiltinOperator_EXP, Register_EXP()); diff --git a/tensorflow/contrib/lite/kernels/split.cc b/tensorflow/contrib/lite/kernels/split.cc new file mode 100644 index 0000000000..b524c79f87 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/split.cc @@ -0,0 +1,159 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace split { + +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { + params = reinterpret_cast(node->builtin_data); + axis = GetInput(context, node, 0); + input = GetInput(context, node, 1); + } + TfLiteSplitParams* params; + TfLiteTensor* axis; + TfLiteTensor* input; +}; + +TfLiteStatus UseDynamicOutputTensors(TfLiteContext* context, TfLiteNode* node) { + for (int i = 0; i < NumOutputs(node); ++i) { + SetTensorToDynamic(GetOutput(context, node, i)); + } + return kTfLiteOk; +} + +TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node, + TfLiteTensor* axis, TfLiteTensor* input, + int num_splits) { + int axis_value = GetTensorData(axis)[0]; + if (axis_value < 0) { + axis_value += NumDimensions(input); + } + + const int input_size = SizeOfDimension(input, axis_value); + TF_LITE_ENSURE_MSG(context, input_size % num_splits == 0, + "Not an even split"); + const int slice_size = input_size / num_splits; + + for (int i = 0; i < NumOutputs(node); ++i) { + TfLiteIntArray* output_dims = TfLiteIntArrayCopy(input->dims); + output_dims->data[axis_value] = slice_size; + TfLiteTensor* output = GetOutput(context, node, i); + TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_dims)); + } + + return kTfLiteOk; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + + OpContext op_context(context, node); + + TF_LITE_ENSURE_EQ(context, NumOutputs(node), op_context.params->num_splits); + + auto input_type = op_context.input->type; + TF_LITE_ENSURE(context, + input_type == kTfLiteFloat32 || input_type == kTfLiteUInt8); + for (int i = 0; i < NumOutputs(node); ++i) { + GetOutput(context, node, i)->type = input_type; + } + + // If we know the contents of the 'axis' tensor, resize all outputs. + // Otherwise, wait until Eval(). + if (IsConstantTensor(op_context.axis)) { + return ResizeOutputTensors(context, node, op_context.axis, op_context.input, + op_context.params->num_splits); + } else { + return UseDynamicOutputTensors(context, node); + } +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + + // When the 'axis' tensor is non-const we can't resize output tensors in + // Prepare(), and we have to do it now. + if (!IsConstantTensor(op_context.axis)) { + TF_LITE_ENSURE_OK( + context, + ResizeOutputTensors(context, node, op_context.axis, op_context.input, + op_context.params->num_splits)); + } + + int axis_value = GetTensorData(op_context.axis)[0]; + if (axis_value < 0) { + axis_value += NumDimensions(op_context.input); + } + axis_value = RemapDim(NumDimensions(op_context.input), axis_value); + + // TODO(ahentz): Our usage of VectorOfTensors could be optimized by + // calculating it in Prepare, unless we defer shape calculation. + // TODO(ahentz): We can improve the optimized_ops version to handle other + // cases too. +#define TF_LITE_SPLIT(scalar) \ + VectorOfTensors all_outputs(*context, *node->outputs); \ + if (axis_value == NumDimensions(op_context.input)) { \ + optimized_ops::TensorFlowSplit( \ + GetTensorData(op_context.input), \ + GetTensorDims(op_context.input), NumOutputs(node), all_outputs.data(), \ + all_outputs.dims()); \ + } else { \ + reference_ops::TensorFlowSplit( \ + GetTensorData(op_context.input), \ + GetTensorDims(op_context.input), axis_value, NumOutputs(node), \ + all_outputs.data(), all_outputs.dims()); \ + } + switch (op_context.input->type) { + case kTfLiteFloat32: { + TF_LITE_SPLIT(float); + break; + } + case kTfLiteUInt8: { + TF_LITE_SPLIT(uint8_t); + break; + } + default: + context->ReportError(context, + "Only float32 and uint8 are currently supported."); + return kTfLiteError; + } +#undef TF_LITE_SPLIT + + return kTfLiteOk; +} + +} // namespace split + +TfLiteRegistration* Register_SPLIT() { + static TfLiteRegistration r = {nullptr, nullptr, split::Prepare, split::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/split_test.cc b/tensorflow/contrib/lite/kernels/split_test.cc new file mode 100644 index 0000000000..61a0759c64 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/split_test.cc @@ -0,0 +1,147 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +constexpr int kAxisIsATensor = -1000; + +class SplitOpModel : public SingleOpModel { + public: + SplitOpModel(const TensorData& input, int num_splits, + int axis = kAxisIsATensor) { + if (axis == kAxisIsATensor) { + axis_ = AddInput({TensorType_INT32, {1}}); + } else { + axis_ = AddConstInput(TensorType_INT32, {axis}, {1}); + } + input_ = AddInput(input); + for (int i = 0; i < num_splits; ++i) { + outputs_.push_back(AddOutput(input.type)); + } + SetBuiltinOp(BuiltinOperator_SPLIT, BuiltinOptions_SplitOptions, + CreateSplitOptions(builder_, num_splits).Union()); + if (axis == kAxisIsATensor) { + BuildInterpreter({GetShape(axis_), GetShape(input_)}); + } else { + BuildInterpreter({{}, GetShape(input_)}); + } + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + void SetAxis(int axis) { PopulateTensor(axis_, {axis}); } + + std::vector GetOutput(int i) { + return ExtractVector(outputs_[i]); + } + std::vector GetOutputShape(int i) { return GetTensorShape(outputs_[i]); } + + private: + int input_; + int axis_; + std::vector outputs_; +}; + +using TensorValues = std::initializer_list; + +void Check(int axis, int num_splits, std::initializer_list input_shape, + std::initializer_list output_shape, + const TensorValues& input_data, + const std::vector& output_data) { + auto debug = [&](int i) { + std::stringstream ss; + ss << "for output tensor " << i << " axis=" << axis + << " and num_splits=" << num_splits; + return ss.str(); + }; + SplitOpModel m({TensorType_FLOAT32, input_shape}, num_splits); + m.SetInput(input_data); + m.SetAxis(axis); + m.Invoke(); + for (int i = 0; i < num_splits; ++i) { + EXPECT_THAT(m.GetOutput(i), ElementsAreArray(output_data[i])) << debug(i); + EXPECT_THAT(m.GetOutputShape(i), ElementsAreArray(output_shape)) + << debug(i); + } + + SplitOpModel const_m({TensorType_FLOAT32, input_shape}, num_splits, axis); + const_m.SetInput(input_data); + const_m.Invoke(); + for (int i = 0; i < num_splits; ++i) { + EXPECT_THAT(const_m.GetOutput(i), ElementsAreArray(output_data[i])) + << debug(i); + EXPECT_THAT(const_m.GetOutputShape(i), ElementsAreArray(output_shape)) + << debug(i); + } +} + +TEST(SplitOpTest, FourDimensional) { + Check(/*axis=*/0, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, + {9, 10, 11, 12, 13, 14, 15, 16}, + }); + Check(/*axis=*/1, /*num_splits=*/2, {2, 2, 2, 2}, {2, 1, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 9, 10, 11, 12}, + {5, 6, 7, 8, 13, 14, 15, 16}, + }); + Check(/*axis=*/2, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 1, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 5, 6, 9, 10, 13, 14}, + {3, 4, 7, 8, 11, 12, 15, 16}, + }); + Check(/*axis=*/3, /*num_splits=*/2, {2, 2, 2, 2}, {2, 2, 2, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 3, 5, 7, 9, 11, 13, 15}, + {2, 4, 6, 8, 10, 12, 14, 16}, + }); +} + +TEST(SplitOpTest, OneDimensional) { + Check(/*axis=*/0, /*num_splits=*/8, {8}, {1}, {1, 2, 3, 4, 5, 6, 7, 8}, + {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}}); +} + +TEST(SplitOpTest, NegativeAxis) { + Check(/*axis=*/-4, /*num_splits=*/2, {2, 2, 2, 2}, {1, 2, 2, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + { + {1, 2, 3, 4, 5, 6, 7, 8}, + {9, 10, 11, 12, 13, 14, 15, 16}, + }); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 6f56aa6bf3..373310bd87 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -187,6 +187,7 @@ void SingleOpModel::BuildInterpreter( for (const auto& shape : input_shapes) { int input_idx = interpreter_->inputs()[i++]; if (input_idx == kOptionalTensor) continue; + if (shape.empty()) continue; CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk); } CHECK(interpreter_->AllocateTensors() == kTfLiteOk) diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 92922a1460..841e96f137 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -535,6 +535,14 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, builtin_data = reinterpret_cast(params); break; } + case BuiltinOperator_SPLIT: { + auto* params = MallocPOD(); + if (auto* schema_params = op->builtin_options_as_SplitOptions()) { + params->num_splits = schema_params->num_splits(); + } + builtin_data = reinterpret_cast(params); + break; + } case BuiltinOperator_SQUEEZE: { auto* params = MallocPOD(); if (auto* schema_params = op->builtin_options_as_SqueezeOptions()) { diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index a83349d95f..02e8499f61 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -340,6 +340,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_MEAN: case tflite::BuiltinOperator_DIV: case tflite::BuiltinOperator_SUB: + case tflite::BuiltinOperator_SPLIT: case tflite::BuiltinOperator_SQUEEZE: case tflite::BuiltinOperator_STRIDED_SLICE: case tflite::BuiltinOperator_EXP: diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 7ec19a0612..75970b4126 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -121,7 +121,8 @@ enum BuiltinOperator : byte { STRIDED_SLICE = 45, BIDIRECTIONAL_SEQUENCE_RNN = 46, EXP = 47, - TOPK_V2=48, + TOPK_V2 = 48, + SPLIT = 49, } // Options for the builtin operators. @@ -160,6 +161,7 @@ union BuiltinOptions { StridedSliceOptions, ExpOptions, TopKV2Options, + SplitOptions, } enum Padding : byte { SAME, VALID } @@ -350,6 +352,10 @@ table SqueezeOptions { squeeze_dims:[int]; } +table SplitOptions { + num_splits: int; +} + table StridedSliceOptions { begin_mask: int; end_mask: int; diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 16cda10c51..06989c7b61 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -130,6 +130,9 @@ struct MeanOptionsT; struct SqueezeOptions; struct SqueezeOptionsT; +struct SplitOptions; +struct SplitOptionsT; + struct StridedSliceOptions; struct StridedSliceOptionsT; @@ -236,11 +239,12 @@ enum BuiltinOperator { BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN = 46, BuiltinOperator_EXP = 47, BuiltinOperator_TOPK_V2 = 48, + BuiltinOperator_SPLIT = 49, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_TOPK_V2 + BuiltinOperator_MAX = BuiltinOperator_SPLIT }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[46] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[47] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -287,7 +291,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[46] { BuiltinOperator_STRIDED_SLICE, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, BuiltinOperator_EXP, - BuiltinOperator_TOPK_V2 + BuiltinOperator_TOPK_V2, + BuiltinOperator_SPLIT }; return values; } @@ -343,6 +348,7 @@ inline const char **EnumNamesBuiltinOperator() { "BIDIRECTIONAL_SEQUENCE_RNN", "EXP", "TOPK_V2", + "SPLIT", nullptr }; return names; @@ -389,11 +395,12 @@ enum BuiltinOptions { BuiltinOptions_StridedSliceOptions = 32, BuiltinOptions_ExpOptions = 33, BuiltinOptions_TopKV2Options = 34, + BuiltinOptions_SplitOptions = 35, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_TopKV2Options + BuiltinOptions_MAX = BuiltinOptions_SplitOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[35] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[36] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -429,7 +436,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[35] { BuiltinOptions_SequenceRNNOptions, BuiltinOptions_StridedSliceOptions, BuiltinOptions_ExpOptions, - BuiltinOptions_TopKV2Options + BuiltinOptions_TopKV2Options, + BuiltinOptions_SplitOptions }; return values; } @@ -471,6 +479,7 @@ inline const char **EnumNamesBuiltinOptions() { "StridedSliceOptions", "ExpOptions", "TopKV2Options", + "SplitOptions", nullptr }; return names; @@ -621,6 +630,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_TopKV2Options; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_SplitOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -924,6 +937,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_TopKV2Options ? reinterpret_cast(value) : nullptr; } + SplitOptionsT *AsSplitOptions() { + return type == BuiltinOptions_SplitOptions ? + reinterpret_cast(value) : nullptr; + } + const SplitOptionsT *AsSplitOptions() const { + return type == BuiltinOptions_SplitOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3391,6 +3412,60 @@ inline flatbuffers::Offset CreateSqueezeOptionsDirect( flatbuffers::Offset CreateSqueezeOptions(flatbuffers::FlatBufferBuilder &_fbb, const SqueezeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct SplitOptionsT : public flatbuffers::NativeTable { + typedef SplitOptions TableType; + int32_t num_splits; + SplitOptionsT() + : num_splits(0) { + } +}; + +struct SplitOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef SplitOptionsT NativeTableType; + enum { + VT_NUM_SPLITS = 4 + }; + int32_t num_splits() const { + return GetField(VT_NUM_SPLITS, 0); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_NUM_SPLITS) && + verifier.EndTable(); + } + SplitOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct SplitOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_num_splits(int32_t num_splits) { + fbb_.AddElement(SplitOptions::VT_NUM_SPLITS, num_splits, 0); + } + explicit SplitOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + SplitOptionsBuilder &operator=(const SplitOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateSplitOptions( + flatbuffers::FlatBufferBuilder &_fbb, + int32_t num_splits = 0) { + SplitOptionsBuilder builder_(_fbb); + builder_.add_num_splits(num_splits); + return builder_.Finish(); +} + +flatbuffers::Offset CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct StridedSliceOptionsT : public flatbuffers::NativeTable { typedef StridedSliceOptions TableType; int32_t begin_mask; @@ -3712,6 +3787,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const TopKV2Options *builtin_options_as_TopKV2Options() const { return builtin_options_type() == BuiltinOptions_TopKV2Options ? static_cast(builtin_options()) : nullptr; } + const SplitOptions *builtin_options_as_SplitOptions() const { + return builtin_options_type() == BuiltinOptions_SplitOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -3874,6 +3952,10 @@ template<> inline const TopKV2Options *Operator::builtin_options_as inline const SplitOptions *Operator::builtin_options_as() const { + return builtin_options_as_SplitOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -5269,6 +5351,32 @@ inline flatbuffers::Offset CreateSqueezeOptions(flatbuffers::Fla _squeeze_dims); } +inline SplitOptionsT *SplitOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new SplitOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void SplitOptions::UnPackTo(SplitOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = num_splits(); _o->num_splits = _e; }; +} + +inline flatbuffers::Offset SplitOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateSplitOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateSplitOptions(flatbuffers::FlatBufferBuilder &_fbb, const SplitOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SplitOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _num_splits = _o->num_splits; + return tflite::CreateSplitOptions( + _fbb, + _num_splits); +} + inline StridedSliceOptionsT *StridedSliceOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new StridedSliceOptionsT(); UnPackTo(_o, _resolver); @@ -5623,6 +5731,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -5777,6 +5889,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -5919,6 +6035,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateTopKV2Options(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(value); + return CreateSplitOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -6061,6 +6181,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new TopKV2OptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_SplitOptions: { + value = new SplitOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -6238,6 +6362,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_SplitOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index d9e269f593..06570ae9aa 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -46,6 +46,7 @@ gen_zipped_test_files( "softmax.zip", "space_to_batch_nd.zip", "space_to_depth.zip", + "split.zip", "squeeze.zip", "strided_slice.zip", "sub.zip", diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index f0b4fcbd52..1ced3bfd73 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -103,6 +103,8 @@ KNOWN_BUGS = { r"div.*int32": "72051395", # TOCO require matching dimensions in strided_slice. r"strided_slice.*begin=\[0\].*end=\[1\].*": "73170889", + # No support for SplitV + r"split.*num_or_size_splits=\[2,2\]": "73377559", } @@ -1030,8 +1032,31 @@ def make_depthwiseconv_tests(zip_path): make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) +def make_split_tests(zip_path): + """Make a set of tests to do tf.split.""" + + test_parameters = [{ + "input_shape": [[1, 3, 4, 6], [2, 4, 1], [6, 4], [8]], + "num_or_size_splits": [1, 2, 3, 4, 5, [2, 2]], + "axis": [0, 1, 2, 3, -4, -3, -2, -1], + }] + + def build_graph(parameters): + input_tensor = tf.placeholder( + dtype=tf.float32, name="input", shape=parameters["input_shape"]) + out = tf.split( + input_tensor, parameters["num_or_size_splits"], parameters["axis"]) + return [input_tensor], out + + def build_inputs(parameters, sess, inputs, outputs): + values = [create_tensor_data(np.float32, parameters["input_shape"])] + return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) + + make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs) + + def make_concatenation_tests(zip_path): - """Make a set of tests to do concatenatinon.""" + """Make a set of tests to do concatenation.""" test_parameters = [{ "base_shape": [[1, 3, 4, 3], [3, 4]], @@ -1786,6 +1811,7 @@ def main(unused_args): "softmax.zip": make_softmax_tests, "space_to_depth.zip": make_space_to_depth_tests, "topk.zip": make_topk_tests, + "split.zip": make_split_tests, "transpose.zip": make_transpose_tests, "mean.zip": make_mean_tests, "squeeze.zip": make_squeeze_tests, diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 80e806ab03..49766cedac 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -262,6 +262,7 @@ INSTANTIATE_TESTS(sigmoid) INSTANTIATE_TESTS(softmax) INSTANTIATE_TESTS(space_to_depth) INSTANTIATE_TESTS(sub) +INSTANTIATE_TESTS(split) INSTANTIATE_TESTS(div) INSTANTIATE_TESTS(transpose) INSTANTIATE_TESTS(mean) diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index 5f2caa5bbb..aabc7c5109 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -601,15 +601,21 @@ class Squeeze } }; -class Split : public CustomOperator { +class Split + : public BuiltinOperator { public: - using CustomOperator::CustomOperator; - void WriteOptions(const TocoOperator& op, - flexbuffers::Builder* fbb) const override { - fbb->Int("num_split", op.num_split); + using BuiltinOperator::BuiltinOperator; + + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + return ::tflite::CreateSplitOptions(*builder, op.num_split); } - void ReadOptions(const flexbuffers::Map& m, TocoOperator* op) const override { - op->num_split = m["num_split"].AsInt64(); + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + op->num_split = options.num_splits(); } }; @@ -813,6 +819,8 @@ std::vector> BuildOperatorList() { OperatorType::kResizeBilinear)); ops.emplace_back( new Squeeze(::tflite::BuiltinOperator_SQUEEZE, OperatorType::kSqueeze)); + ops.emplace_back(new Split(::tflite::BuiltinOperator_SPLIT, + OperatorType::kTensorFlowSplit)); ops.emplace_back(new StridedSlice(::tflite::BuiltinOperator_STRIDED_SLICE, OperatorType::kStridedSlice)); ops.emplace_back( @@ -825,7 +833,6 @@ std::vector> BuildOperatorList() { ops.emplace_back( new DepthToSpace("DEPTH_TO_SPACE", OperatorType::kDepthToSpace)); ops.emplace_back(new FakeQuant("FAKE_QUANT", OperatorType::kFakeQuant)); - ops.emplace_back(new Split("SPLIT", OperatorType::kTensorFlowSplit)); ops.emplace_back(new TensorFlowUnsupported( "TENSORFLOW_UNSUPPORTED", OperatorType::kTensorFlowUnsupported)); -- GitLab From a4dc25936dc4079c2e4e4869aa71033da977c5fb Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 15 Feb 2018 15:09:19 -0800 Subject: [PATCH 0078/2939] Update tf.keras to Keras 2.1.4 API PiperOrigin-RevId: 185908711 --- .../keras/_impl/keras/layers/recurrent.py | 51 +++++++++--- .../_impl/keras/layers/recurrent_test.py | 31 ++++++- .../keras/_impl/keras/layers/wrappers.py | 82 +++++++++++++++++-- .../keras/_impl/keras/layers/wrappers_test.py | 47 +++++++++-- ...nsorflow.keras.layers.-bidirectional.pbtxt | 4 + ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 2 +- ...rflow.keras.layers.-time-distributed.pbtxt | 4 + .../tensorflow.keras.layers.-wrapper.pbtxt | 4 + 8 files changed, 198 insertions(+), 27 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 4bf6ae975f..b34b92c763 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -87,7 +87,7 @@ class StackedRNNCells(Layer): state_size.append(cell.state_size) return tuple(state_size) - def call(self, inputs, states, **kwargs): + def call(self, inputs, states, constants=None, **kwargs): # Recover per-cell states. nested_states = [] for cell in self.cells[::-1]: @@ -102,7 +102,12 @@ class StackedRNNCells(Layer): # Call the cells in order and store the returned states. new_nested_states = [] for cell, states in zip(self.cells, nested_states): - inputs, states = cell.call(inputs, states, **kwargs) + if has_arg(cell.call, 'constants'): + inputs, states = cell.call(inputs, states, constants=constants, + **kwargs) + else: + inputs, states = cell.call(inputs, states, **kwargs) + new_nested_states.append(states) # Format the new states as a flat list @@ -114,9 +119,15 @@ class StackedRNNCells(Layer): @shape_type_conversion def build(self, input_shape): + if isinstance(input_shape, list): + constants_shape = input_shape[1:] + input_shape = input_shape[0] for cell in self.cells: if isinstance(cell, Layer): - cell.build(input_shape) + if has_arg(cell.call, 'constants'): + cell.build([input_shape] + constants_shape) + else: + cell.build(input_shape) if hasattr(cell.state_size, '__len__'): output_dim = cell.state_size[0] else: @@ -527,12 +538,14 @@ class RNN(Layer): self._num_constants = len(constants) additional_specs += self.constants_spec # at this point additional_inputs cannot be empty - is_keras_tensor = hasattr(additional_inputs[0], '_keras_history') + is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: - if hasattr(tensor, '_keras_history') != is_keras_tensor: + if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state or constants of an RNN' ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors') + ' Keras tensors and non-Keras tensors' + '(a "Keras tensor" is a tensor that was' + 'returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state and constants @@ -796,7 +809,8 @@ class SimpleRNNCell(Layer): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, @@ -966,6 +980,7 @@ class SimpleRNN(RNN): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. @@ -1176,10 +1191,14 @@ class GRUCell(Layer): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. + Default: hyperbolic tangent (`tanh`). If you pass None, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. @@ -1427,10 +1446,14 @@ class GRU(RNN): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. @@ -1661,10 +1684,14 @@ class LSTMCell(Layer): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`).x use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. @@ -1943,10 +1970,14 @@ class LSTM(RNN): Arguments: units: Positive integer, dimensionality of the output space. activation: Activation function to use. - If you pass None, no activation is applied + Default: hyperbolic tangent (`tanh`). + If you pass `None`, no activation is applied (ie. "linear" activation: `a(x) = x`). recurrent_activation: Activation function to use for the recurrent step. + Default: hard sigmoid (`hard_sigmoid`). + If you pass `None`, no activation is applied + (ie. "linear" activation: `a(x) = x`). use_bias: Boolean, whether the layer uses a bias vector. kernel_initializer: Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs.. diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py index ab48a63e35..de022153f6 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent_test.py @@ -253,7 +253,7 @@ class RNNTest(test.TestCase): self.assertAllClose(y_np, y_np_2, atol=1e-4) with self.test_session(): - # test flat list inputs + # test flat list inputs. with keras.utils.CustomObjectScope(custom_objects): layer = keras.layers.RNN.from_config(config.copy()) y = layer([x, c]) @@ -262,6 +262,35 @@ class RNNTest(test.TestCase): y_np_3 = model.predict([x_np, c_np]) self.assertAllClose(y_np, y_np_3, atol=1e-4) + with self.test_session(): + # Test stacking. + cells = [keras.layers.recurrent.GRUCell(8), + RNNCellWithConstants(12), + RNNCellWithConstants(32)] + layer = keras.layers.recurrent.RNN(cells) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.compile(optimizer='rmsprop', loss='mse') + model.train_on_batch( + [np.zeros((6, 5, 5)), np.zeros((6, 3))], + np.zeros((6, 32)) + ) + + with self.test_session(): + # Test stacked RNN serialization + x_np = np.random.random((6, 5, 5)) + c_np = np.random.random((6, 3)) + y_np = model.predict([x_np, c_np]) + weights = model.get_weights() + config = layer.get_config() + with keras.utils.CustomObjectScope(custom_objects): + layer = keras.layers.recurrent.RNN.from_config(config.copy()) + y = layer(x, constants=c) + model = keras.models.Model([x, c], y) + model.set_weights(weights) + y_np_2 = model.predict([x_np, c_np]) + self.assertAllClose(y_np, y_np_2, atol=1e-4) + def test_rnn_cell_with_constants_layer_passing_initial_state(self): class RNNCellWithConstants(keras.layers.Layer): diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index f053aa1d09..61f1a758e4 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -61,6 +61,14 @@ class Wrapper(Layer): else: return None + @property + def trainable(self): + return self.layer.trainable + + @trainable.setter + def trainable(self, value): + self.layer.trainable = value + @property def trainable_weights(self): return self.layer.trainable_weights @@ -255,7 +263,6 @@ class Bidirectional(Wrapper): """ def __init__(self, layer, merge_mode='concat', weights=None, **kwargs): - super(Bidirectional, self).__init__(layer, **kwargs) if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]: raise ValueError('Invalid merge mode. ' 'Merge mode should be one of ' @@ -275,6 +282,19 @@ class Bidirectional(Wrapper): self.return_sequences = layer.return_sequences self.return_state = layer.return_state self.supports_masking = True + self._trainable = True + super(Bidirectional, self).__init__(layer, **kwargs) + self.input_spec = layer.input_spec + + @property + def trainable(self): + return self._trainable + + @trainable.setter + def trainable(self, value): + self._trainable = value + self.forward_layer.trainable = value + self.backward_layer.trainable = value def get_weights(self): return self.forward_layer.get_weights() + self.backward_layer.get_weights() @@ -305,6 +325,61 @@ class Bidirectional(Wrapper): return [output_shape] + state_shape + copy.copy(state_shape) return output_shape + def __call__(self, inputs, initial_state=None, **kwargs): + if isinstance(inputs, list): + if len(inputs) > 1: + initial_state = inputs[1:] + inputs = inputs[0] + + if initial_state is None: + return super(Bidirectional, self).__call__(inputs, **kwargs) + + # Standardize `initial_state` into list + if isinstance(initial_state, tuple): + initial_state = list(initial_state) + elif not isinstance(initial_state, list): + initial_state = [initial_state] + + # Check if `initial_state` can be splitted into half + num_states = len(initial_state) + if num_states % 2 > 0: + raise ValueError( + 'When passing `initial_state` to a Bidirectional RNN, the state ' + 'should be a list containing the states of the underlying RNNs. ' + 'Found: ' + str(initial_state)) + + # Applies the same workaround as in `RNN.__call__`, without handling + # constants + kwargs['initial_state'] = initial_state + additional_inputs = initial_state + additional_specs = [InputSpec(shape=K.int_shape(state)) + for state in initial_state] + self.forward_layer.state_spec = additional_specs[:num_states // 2] + self.backward_layer.state_spec = additional_specs[num_states // 2:] + + is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) + for tensor in additional_inputs: + if K.is_keras_tensor(tensor) != is_keras_tensor: + raise ValueError('The initial state of a Bidirectional' + ' layer cannot be specified with a mix of' + ' Keras tensors and non-Keras tensors' + ' (a "Keras tensor" is a tensor that was' + ' returned by a Keras layer, or by `Input`)') + + if is_keras_tensor: + # Compute the full input spec, including state + full_input = [inputs] + additional_inputs + full_input_spec = self.input_spec + additional_specs + + # Perform the call with temporarily replaced input_spec + original_input_spec = self.input_spec + self.input_spec = full_input_spec + output = super(Bidirectional, self).__call__(full_input, **kwargs) + self.input_spec = original_input_spec + return output + else: + return super(Bidirectional, self).__call__(inputs, **kwargs) + def call(self, inputs, training=None, mask=None, initial_state=None): kwargs = {} if has_arg(self.layer.call, 'training'): @@ -313,11 +388,6 @@ class Bidirectional(Wrapper): kwargs['mask'] = mask if initial_state is not None and has_arg(self.layer.call, 'initial_state'): - if not isinstance(initial_state, list): - raise ValueError( - 'When passing `initial_state` to a Bidirectional RNN, the state ' - 'should be a list containing the states of the underlying RNNs. ' - 'Found: ' + str(initial_state)) forward_state = initial_state[:len(initial_state) // 2] backward_state = initial_state[len(initial_state) // 2:] y = self.forward_layer.call(inputs, initial_state=forward_state, **kwargs) diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py b/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py index f48c8919a1..c81d6b883c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers_test.py @@ -133,6 +133,20 @@ class TimeDistributedTest(test.TestCase): # Verify input_map has one mapping from inputs to reshaped inputs. self.assertEqual(len(td._input_map.keys()), 1) + def test_TimeDistributed_trainable(self): + # test layers that need learning_phase to be set + x = keras.layers.Input(shape=(3, 2)) + layer = keras.layers.TimeDistributed(keras.layers.BatchNormalization()) + _ = layer(x) + assert len(layer.updates) == 2 + assert len(layer.trainable_weights) == 2 + layer.trainable = False + assert not layer.updates + assert not layer.trainable_weights + layer.trainable = True + assert len(layer.updates) == 2 + assert len(layer.trainable_weights) == 2 + class BidirectionalTest(test.TestCase): @@ -338,23 +352,38 @@ class BidirectionalTest(test.TestCase): units = 3 with self.test_session(): - inputs = keras.Input((timesteps, dim)) + input1 = keras.layers.Input((timesteps, dim)) layer = keras.layers.Bidirectional( rnn(units, return_state=True, return_sequences=True)) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] + state = layer(input1)[1:] # test passing invalid initial_state: passing a tensor + input2 = keras.layers.Input((timesteps, dim)) with self.assertRaises(ValueError): output = keras.layers.Bidirectional( - rnn(units))(output, initial_state=state[0]) + rnn(units))(input2, initial_state=state[0]) # test valid usage: passing a list - output = keras.layers.Bidirectional( - rnn(units))(output, initial_state=state) - model = keras.Model(inputs, output) - inputs = np.random.rand(samples, timesteps, dim) - outputs = model.predict(inputs) + output = keras.layers.Bidirectional(rnn(units))(input2, + initial_state=state) + model = keras.models.Model([input1, input2], output) + assert len(model.layers) == 4 + assert isinstance(model.layers[-1].input, list) + inputs = [np.random.rand(samples, timesteps, dim), + np.random.rand(samples, timesteps, dim)] + model.predict(inputs) + + def test_Bidirectional_trainable(self): + # test layers that need learning_phase to be set + with self.test_session(): + x = keras.layers.Input(shape=(3, 2)) + layer = keras.layers.Bidirectional(keras.layers.SimpleRNN(3)) + _ = layer(x) + assert len(layer.trainable_weights) == 6 + layer.trainable = False + assert not layer.trainable_weights + layer.trainable = True + assert len(layer.trainable_weights) == 6 def _to_list(ls): diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt index db26c3e568..699208a0b9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt @@ -73,6 +73,10 @@ tf_class { name: "scope_name" mtype: "" } + member { + name: "trainable" + mtype: "" + } member { name: "trainable_variables" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 90c37bd986..3dde1e5769 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -122,7 +122,7 @@ tf_class { } member_method { name: "call" - argspec: "args=[\'self\', \'inputs\', \'states\'], varargs=None, keywords=kwargs, defaults=None" + argspec: "args=[\'self\', \'inputs\', \'states\', \'constants\'], varargs=None, keywords=kwargs, defaults=[\'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt index 40aa782a02..1e176d8d4b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt @@ -69,6 +69,10 @@ tf_class { name: "scope_name" mtype: "" } + member { + name: "trainable" + mtype: "" + } member { name: "trainable_variables" mtype: "" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt index 27a54382a4..ea3bb2f8f5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt @@ -68,6 +68,10 @@ tf_class { name: "scope_name" mtype: "" } + member { + name: "trainable" + mtype: "" + } member { name: "trainable_variables" mtype: "" -- GitLab From 1a1617e946db2b7c1acd1eafa9a47561eb68dfb5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 15:32:17 -0800 Subject: [PATCH 0079/2939] Add /learning/tfx/ to the visibility group of tensorflow/compiler/tf2xla/python. PiperOrigin-RevId: 185912486 --- tensorflow/compiler/tf2xla/python/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/tf2xla/python/BUILD b/tensorflow/compiler/tf2xla/python/BUILD index 49bde78039..f0a2ef0651 100644 --- a/tensorflow/compiler/tf2xla/python/BUILD +++ b/tensorflow/compiler/tf2xla/python/BUILD @@ -1,7 +1,10 @@ licenses(["notice"]) # Apache 2.0 package( - default_visibility = ["//tensorflow:internal"], + default_visibility = [ + "//learning/tfx:__subpackages__", + "//tensorflow:internal", + ], ) load( -- GitLab From c593796a4f87f308b157ed41207eee9ff7d62de7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 15:54:39 -0800 Subject: [PATCH 0080/2939] Don't spam the logs. PiperOrigin-RevId: 185916071 --- tensorflow/core/grappler/optimizers/constant_folding.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 8f89f2ae64..b8a21ea5a1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1712,7 +1712,6 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { - LOG(INFO) << "Graph before: " << item.graph.DebugString(); nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); @@ -1739,7 +1738,6 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, *output->mutable_library() = item.graph.library(); *output->mutable_versions() = item.graph.versions(); - LOG(INFO) << "Graph after: " << output->DebugString(); return Status::OK(); } -- GitLab From c859e8a7b7611a30730f176fc9cddcb2dd59adfb Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 15 Feb 2018 15:55:32 -0800 Subject: [PATCH 0081/2939] Fix a typo in model.cc error message. PiperOrigin-RevId: 185916196 --- tensorflow/contrib/lite/model.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 841e96f137..d6522fc077 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -136,7 +136,7 @@ TfLiteStatus InterpreterBuilder::BuildLocalIndexToRegistrationMapping() { } } else if (!opcode->custom_code()) { error_reporter_->Report( - "Operator with builtin_code==0 has no custom_code.\n"); + "Operator with CUSTOM builtin_code has no custom_code.\n"); status = kTfLiteError; } else { const char* name = opcode->custom_code()->c_str(); -- GitLab From e6f69c1161f24e80e71caeab6c721a98b208d5d7 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 15 Feb 2018 15:56:10 -0800 Subject: [PATCH 0082/2939] Update eager's MNIST example to inherit from `tf.keras.Model`. Also make estimator utils compatible with `tf_decorator`-wrapped functions. PiperOrigin-RevId: 185916290 --- .../eager/python/examples/mnist/mnist.py | 25 +++++++++---------- tensorflow/python/estimator/util.py | 2 ++ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/mnist/mnist.py b/tensorflow/contrib/eager/python/examples/mnist/mnist.py index ed7dbc8904..241eb23ce9 100644 --- a/tensorflow/contrib/eager/python/examples/mnist/mnist.py +++ b/tensorflow/contrib/eager/python/examples/mnist/mnist.py @@ -35,7 +35,7 @@ from tensorflow.examples.tutorials.mnist import input_data FLAGS = None -class MNISTModel(tfe.Network): +class MNISTModel(tf.keras.Model): """MNIST Network. Network structure is equivalent to: @@ -61,18 +61,17 @@ class MNISTModel(tfe.Network): else: assert data_format == 'channels_last' self._input_shape = [-1, 28, 28, 1] - self.conv1 = self.track_layer( - tf.layers.Conv2D(32, 5, data_format=data_format, activation=tf.nn.relu)) - self.conv2 = self.track_layer( - tf.layers.Conv2D(64, 5, data_format=data_format, activation=tf.nn.relu)) - self.fc1 = self.track_layer(tf.layers.Dense(1024, activation=tf.nn.relu)) - self.fc2 = self.track_layer(tf.layers.Dense(10)) - self.dropout = self.track_layer(tf.layers.Dropout(0.5)) - self.max_pool2d = self.track_layer( - tf.layers.MaxPooling2D( - (2, 2), (2, 2), padding='SAME', data_format=data_format)) - - def call(self, inputs, training): + self.conv1 = tf.layers.Conv2D( + 32, 5, data_format=data_format, activation=tf.nn.relu) + self.conv2 = tf.layers.Conv2D( + 64, 5, data_format=data_format, activation=tf.nn.relu) + self.fc1 = tf.layers.Dense(1024, activation=tf.nn.relu) + self.fc2 = tf.layers.Dense(10) + self.dropout = tf.layers.Dropout(0.5) + self.max_pool2d = tf.layers.MaxPooling2D( + (2, 2), (2, 2), padding='SAME', data_format=data_format) + + def call(self, inputs, training=False): """Computes labels from inputs. Users should invoke __call__ to run the network, which delegates to this diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py index b7ba76d871..3ce8eea84b 100644 --- a/tensorflow/python/estimator/util.py +++ b/tensorflow/python/estimator/util.py @@ -21,10 +21,12 @@ from __future__ import print_function import functools +from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect def _is_bounded_method(fn): + _, fn = tf_decorator.unwrap(fn) return tf_inspect.ismethod(fn) and (fn.__self__ is not None) -- GitLab From f5b30312013df5b7bd3a50555b2facadd4aed204 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 16:23:43 -0800 Subject: [PATCH 0083/2939] K-FAC: Support for embedding layers, add FisherFactor.{multiply, multiply_inverse}. PiperOrigin-RevId: 185920837 --- .../python/kernel_tests/fisher_blocks_test.py | 64 ++- .../kernel_tests/fisher_factors_test.py | 42 +- .../contrib/kfac/python/ops/fisher_blocks.py | 144 +++++-- .../kfac/python/ops/fisher_blocks_lib.py | 5 +- .../contrib/kfac/python/ops/fisher_factors.py | 387 +++++++++++++++--- .../kfac/python/ops/fisher_factors_lib.py | 29 +- .../kfac/python/ops/layer_collection.py | 52 +++ tensorflow/contrib/kfac/python/ops/utils.py | 61 ++- .../contrib/kfac/python/ops/utils_lib.py | 2 + 9 files changed, 662 insertions(+), 124 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py index 82accd57f0..fb4b3a241c 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py @@ -26,6 +26,7 @@ from tensorflow.contrib.kfac.python.ops import utils from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import state_ops @@ -236,10 +237,10 @@ class NaiveDiagonalFBTest(test.TestCase): self.assertAllClose(output_flat, explicit) -class FullyConnectedDiagonalFB(test.TestCase): +class FullyConnectedDiagonalFBTest(test.TestCase): def setUp(self): - super(FullyConnectedDiagonalFB, self).setUp() + super(FullyConnectedDiagonalFBTest, self).setUp() self.batch_size = 4 self.input_size = 6 @@ -375,6 +376,65 @@ class FullyConnectedDiagonalFB(test.TestCase): return multiply_result, multiply_inverse_result +class EmbeddingKFACFBTest(test.TestCase): + + def testInstantiateFactors(self): + with ops.Graph().as_default(): + random_seed.set_random_seed(200) + + # Create a Fisher Block. + vocab_size = 5 + block = fb.EmbeddingKFACFB(lc.LayerCollection(), vocab_size) + + # Add some examples. + inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) + outputs = array_ops.constant([[0.], [1.], [2.]]) + block.register_additional_minibatch(inputs, outputs) + + # Instantiate factor's variables. Ensure it doesn't fail. + grads = outputs**2. + damping = array_ops.constant(0.) + block.instantiate_factors(([grads],), damping) + + def testMultiplyInverse(self): + with ops.Graph().as_default(), self.test_session() as sess: + random_seed.set_random_seed(200) + + # Create a Fisher Block. + vocab_size = 5 + block = fb.EmbeddingKFACFB(lc.LayerCollection(), vocab_size) + + # Add some examples. + inputs = array_ops.constant([[0, 1], [1, 2], [2, 3]]) + outputs = array_ops.constant([[0.], [1.], [2.]]) + block.register_additional_minibatch(inputs, outputs) + + # Instantiate factor's variables. Ensure it doesn't fail. + grads = outputs**2. + damping = array_ops.constant(0.) + block.instantiate_factors(([grads],), damping) + + # Create a sparse update. + indices = array_ops.constant([1, 3, 4]) + values = array_ops.constant([[1.], [1.], [1.]]) + sparse_vector = ops.IndexedSlices( + values, indices, dense_shape=[vocab_size, 1]) + dense_vector = array_ops.reshape([0., 1., 0., 1., 1.], [vocab_size, 1]) + + # Compare Fisher-vector product against explicit result. + result = block.multiply_inverse(sparse_vector) + expected_result = linalg_ops.matrix_solve(block.full_fisher_block(), + dense_vector) + + sess.run(tf_variables.global_variables_initializer()) + self.assertAlmostEqual( + sess.run(expected_result[1]), sess.run(result.values[0])) + self.assertAlmostEqual( + sess.run(expected_result[3]), sess.run(result.values[1])) + self.assertAlmostEqual( + sess.run(expected_result[4]), sess.run(result.values[2])) + + class FullyConnectedKFACBasicFBTest(test.TestCase): def testFullyConnectedKFACBasicFBInit(self): diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index 753378d9f4..66e18974ab 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -89,6 +89,21 @@ class FisherFactorTestingDummy(ff.FisherFactor): def make_inverse_update_ops(self): return [] + def get_cov(self): + return NotImplementedError + + def left_multiply(self, x, damping): + return NotImplementedError + + def right_multiply(self, x, damping): + return NotImplementedError + + def left_multiply_inverse(self, x, damping): + return NotImplementedError + + def right_multiply_inverse(self, x, damping): + return NotImplementedError + class InverseProvidingFactorTestingDummy(ff.InverseProvidingFactor): """Dummy class to test the non-abstract methods on ff.InverseProvidingFactor. @@ -379,7 +394,7 @@ class NaiveDiagonalFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), name='a/b/c') factor = ff.NaiveDiagonalFactor((tensor,), 32) - self.assertEqual([6, 1], factor.get_cov().get_shape().as_list()) + self.assertEqual([6, 1], factor.get_cov_var().get_shape().as_list()) def testNaiveDiagonalFactorInitFloat64(self): with tf_ops.Graph().as_default(): @@ -387,7 +402,7 @@ class NaiveDiagonalFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') factor = ff.NaiveDiagonalFactor((tensor,), 32) - cov = factor.get_cov() + cov = factor.get_cov_var() self.assertEqual(cov.dtype, dtype) self.assertEqual([6, 1], cov.get_shape().as_list()) @@ -402,6 +417,29 @@ class NaiveDiagonalFactorTest(test.TestCase): self.assertAllClose([[0.75], [1.5]], new_cov) +class EmbeddingInputKroneckerFactorTest(test.TestCase): + + def testInitialization(self): + with tf_ops.Graph().as_default(): + input_ids = array_ops.constant([[0], [1], [4]]) + vocab_size = 5 + factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) + cov = factor.get_cov_var() + self.assertEqual(cov.shape.as_list(), [vocab_size]) + + def testCovarianceUpdateOp(self): + with tf_ops.Graph().as_default(): + input_ids = array_ops.constant([[0], [1], [4]]) + vocab_size = 5 + factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) + cov_update_op = factor.make_covariance_update_op(0.0) + + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + new_cov = sess.run(cov_update_op) + self.assertAllClose(np.array([1., 1., 0., 0., 1.]) / 3., new_cov) + + class FullyConnectedKroneckerFactorTest(test.TestCase): def _testFullyConnectedKroneckerFactorInit(self, diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index 0d2fa706f5..cf38d28b43 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -92,10 +92,22 @@ def compute_pi_tracenorm(left_cov, right_cov): Returns: The computed scalar constant pi for these Kronecker Factors (as a Tensor). """ + + def _trace(cov): + if len(cov.shape) == 1: + # Diagonal matrix. + return math_ops.reduce_sum(cov) + elif len(cov.shape) == 2: + # Full matrix. + return math_ops.trace(cov) + else: + raise ValueError( + "What's the trace of a Tensor of rank %d?" % len(cov.shape)) + # Instead of dividing by the dim of the norm, we multiply by the dim of the # other norm. This works out the same in the ratio. - left_norm = math_ops.trace(left_cov) * right_cov.shape.as_list()[0] - right_norm = math_ops.trace(right_cov) * left_cov.shape.as_list()[0] + left_norm = _trace(left_cov) * right_cov.shape.as_list()[0] + right_norm = _trace(right_cov) * left_cov.shape.as_list()[0] return math_ops.sqrt(left_norm / right_norm) @@ -201,15 +213,15 @@ class FullFB(FisherBlock): self._factor.register_damped_inverse(damping) def multiply_inverse(self, vector): - inverse = self._factor.get_damped_inverse(self._damping) - out_flat = math_ops.matmul(inverse, utils.tensors_to_column(vector)) + vector_flat = utils.tensors_to_column(vector) + out_flat = self._factor.left_multiply_inverse( + vector_flat, self._damping) return utils.column_to_tensors(vector, out_flat) def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) - out_flat = ( - math_ops.matmul(self._factor.get_cov(), vector_flat) + - self._damping * vector_flat) + out_flat = self._factor.left_multiply( + vector_flat, self._damping) return utils.column_to_tensors(vector, out_flat) def full_fisher_block(self): @@ -265,16 +277,20 @@ class NaiveDiagonalFB(FisherBlock): def multiply_inverse(self, vector): vector_flat = utils.tensors_to_column(vector) - out_flat = vector_flat / (self._factor.get_cov() + self._damping) + print("vector_flat: %s" % vector_flat) + out_flat = self._factor.left_multiply_inverse( + vector_flat, self._damping) + print("out_flat: %s" % out_flat) return utils.column_to_tensors(vector, out_flat) def multiply(self, vector): vector_flat = utils.tensors_to_column(vector) - out_flat = vector_flat * (self._factor.get_cov() + self._damping) + out_flat = self._factor.left_multiply( + vector_flat, self._damping) return utils.column_to_tensors(vector, out_flat) def full_fisher_block(self): - return array_ops.diag(array_ops.reshape(self._factor.get_cov(), (-1,))) + return self._factor.get_cov() def tensors_to_compute_grads(self): return self._params @@ -356,8 +372,9 @@ class FullyConnectedDiagonalFB(FisherBlock): Tensor of the same shape, corresponding to the inverse Fisher-vector product. """ - reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect / (self._factor.get_cov() + self._damping) + reshaped_vec = utils.layer_params_to_mat2d(vector) + reshaped_out = self._factor.left_multiply_inverse( + reshaped_vec, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def multiply(self, vector): @@ -372,8 +389,9 @@ class FullyConnectedDiagonalFB(FisherBlock): Returns: Tensor of the same shape, corresponding to the Fisher-vector product. """ - reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect * (self._factor.get_cov() + self._damping) + reshaped_vec = utils.layer_params_to_mat2d(vector) + reshaped_out = self._factor.left_multiply( + reshaped_vec, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def tensors_to_compute_grads(self): @@ -468,12 +486,14 @@ class ConvDiagonalFB(FisherBlock): def multiply_inverse(self, vector): reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect / (self._factor.get_cov() + self._damping) + reshaped_out = self._factor.left_multiply_inverse( + reshaped_vect, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def multiply(self, vector): reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = reshaped_vect * (self._factor.get_cov() + self._damping) + reshaped_out = self._factor.left_multiply( + reshaped_vect, self._damping) return utils.mat2d_to_layer_params(vector, reshaped_out) def tensors_to_compute_grads(self): @@ -533,28 +553,24 @@ class KroneckerProductFB(FisherBlock): return 1.0 def multiply_inverse(self, vector): - left_factor_inv = self._input_factor.get_damped_inverse(self._input_damping) - right_factor_inv = self._output_factor.get_damped_inverse( - self._output_damping) reshaped_vector = utils.layer_params_to_mat2d(vector) - reshaped_out = math_ops.matmul(left_factor_inv, - math_ops.matmul(reshaped_vector, - right_factor_inv)) + reshaped_out = self._output_factor.right_multiply_inverse( + reshaped_vector, + self._output_damping) + reshaped_out = self._input_factor.left_multiply_inverse( + reshaped_out, self._input_damping) if self._renorm_coeff != 1.0: reshaped_out /= math_ops.cast( self._renorm_coeff, dtype=reshaped_out.dtype) return utils.mat2d_to_layer_params(vector, reshaped_out) def multiply(self, vector): - left_factor = self._input_factor.get_cov() - right_factor = self._output_factor.get_cov() reshaped_vector = utils.layer_params_to_mat2d(vector) - reshaped_out = ( - math_ops.matmul(reshaped_vector, right_factor) + - self._output_damping * reshaped_vector) - reshaped_out = ( - math_ops.matmul(left_factor, reshaped_out) + - self._input_damping * reshaped_out) + reshaped_out = self._output_factor.right_multiply( + reshaped_vector, + self._output_damping) + reshaped_out = self._input_factor.left_multiply( + reshaped_out, self._input_damping) if self._renorm_coeff != 1.0: reshaped_out *= math_ops.cast( self._renorm_coeff, dtype=reshaped_out.dtype) @@ -574,6 +590,74 @@ class KroneckerProductFB(FisherBlock): right_factor) +class EmbeddingKFACFB(KroneckerProductFB): + """K-FAC FisherBlock for embedding layers. + + This FisherBlock is similar to EmbeddingKFACFB, except that its + input factor is approximated by a diagonal matrix. In the case that each + example references exactly one embedding, this approximation is exact. + + Does not support bias parameters. + """ + + def __init__(self, layer_collection, vocab_size): + """Creates a EmbeddingKFACFB block. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + vocab_size: int. Size of vocabulary for this embedding layer. + """ + self._inputs = [] + self._outputs = [] + self._vocab_size = vocab_size + + super(EmbeddingKFACFB, self).__init__(layer_collection) + + def instantiate_factors(self, grads_list, damping): + """Instantiate Kronecker Factors for this FisherBlock. + + Args: + grads_list: List of list of Tensors. grads_list[i][j] is the + gradient of the loss with respect to 'outputs' from source 'i' and + tower 'j'. Each Tensor has shape [tower_minibatch_size, output_size]. + damping: 0-D Tensor or float. 'damping' * identity is approximately added + to this FisherBlock's Fisher approximation. + """ + # TODO(b/68033310): Validate which of, + # (1) summing on a single device (as below), or + # (2) on each device in isolation and aggregating + # is faster. + inputs = _concat_along_batch_dim(self._inputs) + grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list) + + self._input_factor = self._layer_collection.make_or_get_factor( # + fisher_factors.EmbeddingInputKroneckerFactor, # + ((inputs,), self._vocab_size)) + self._output_factor = self._layer_collection.make_or_get_factor( # + fisher_factors.FullyConnectedKroneckerFactor, # + (grads_list,)) + self._register_damped_input_and_output_inverses(damping) + + def tensors_to_compute_grads(self): + return self._outputs + + def register_additional_minibatch(self, inputs, outputs): + """Registers an additional minibatch to the FisherBlock. + + Args: + inputs: Tensor of shape [batch_size, input_size]. Inputs to the + matrix-multiply. + outputs: Tensor of shape [batch_size, output_size]. Layer preactivations. + """ + self._inputs.append(inputs) + self._outputs.append(outputs) + + @property + def num_registered_minibatches(self): + return len(self._inputs) + + class FullyConnectedKFACBasicFB(KroneckerProductFB): """K-FAC FisherBlock for fully-connected (dense) layers. diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py index ac39630920..c04cf727fa 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks_lib.py @@ -29,6 +29,7 @@ _allowed_symbols = [ 'NaiveDiagonalFB', 'FullyConnectedDiagonalFB', 'KroneckerProductFB', + 'EmbeddingKFACFB', 'FullyConnectedKFACBasicFB', 'ConvKFCBasicFB', 'ConvDiagonalFB', @@ -36,7 +37,9 @@ _allowed_symbols = [ 'compute_pi_tracenorm', 'compute_pi_adjusted_damping', 'num_conv_locations', - 'normalize_damping' + 'normalize_damping', + 'LEFT_MULTIPLY', + 'RIGHT_MULTIPLY', ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index bcba18ae14..603d8b8b21 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -25,13 +25,13 @@ import numpy as np import six from tensorflow.contrib.kfac.python.ops import utils +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn from tensorflow.python.ops import special_math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables @@ -112,54 +112,6 @@ def diagonal_covariance_initializer(shape, dtype, partition_info): # pylint: di return array_ops.ones(shape, dtype) -def extract_image_patches(image, ksizes, strides, padding, name=None): - """Extracts image patches for an N-dimensional convolution. - - This function is a compatibility wrapper over tf.extract_image_patches(), as - ExtractImagePatches isn't yet implemented in XLA. - - Args: - image: Tensor of shape [batch, in_x, in_y, ..., in_channels]. Input images. - All dimensions except 'batch' must be defined. - ksizes: [filter_x, filter_y, ...]. Spatial shape of filter in each - dimension. - strides: [stride_x, stride_y, ...]. Spatial stride for filter in each - dimension. - padding: str. "VALID" or "SAME". - name: str or None. name of Op. - - Returns: - result: [batch, out_x, out_y, ..., filter_x, filter_y, ..., in_channels]. - Contains image patches to which conv kernel would be applied for each - output location. [out_x, out_y, ...] depends on padding. - """ - if not utils.on_tpu(): - return array_ops.extract_image_patches( - image, - ksizes=([1] + list(ksizes) + [1]), - strides=([1] + list(strides) + [1]), - rates=[1, 1, 1, 1], - padding=padding, - name=name) - - with tf_ops.name_scope(name, "extract_image_patches", - [image, ksizes, strides, padding]): - batch = image.shape.as_list()[0] - in_channels = image.shape.as_list()[-1] - - # Map each input feature to a location in the output. - out_channels = np.prod(ksizes) * in_channels - filters = linalg_ops.eye(out_channels), - filters = array_ops.reshape(filters, ksizes + [in_channels, out_channels]) - - result = nn.convolution(image, filters, padding, strides=strides) - out_spatial = result.shape.as_list()[1:-1] - result = array_ops.reshape( - result, [batch or -1] + out_spatial + ksizes + [in_channels]) - - return result - - def compute_cov(tensor, tensor_right=None, normalizer=None): """Compute the empirical second moment of the rows of a 2D Tensor. @@ -259,12 +211,21 @@ def scalar_or_tensor_to_string(val): class FisherFactor(object): """Base class for objects modeling factors of approximate Fisher blocks. - Note that for blocks that aren't based on approximations, a 'factor' can - be the entire block itself, as is the case for the diagonal and full - representations. + A FisherFactor represents part of an approximate Fisher Information matrix. + For example, one approximation to the Fisher uses the Kronecker product of two + FisherFactors A and B, F = kron(A, B). FisherFactors are composed with + FisherBlocks to construct a block-diagonal approximation to the full Fisher. + + FisherFactors are backed by a single, non-trainable variable that is updated + by running FisherFactor.make_covariance_update_op(). The shape and type of + this variable is implementation specific. - Subclasses must implement the _compute_new_cov method, and the _var_scope - and _cov_shape properties. + Note that for blocks that aren't based on approximations, a 'factor' can + be the entire block itself, as is the case for the diagonal and full + representations. + + Subclasses must implement the _compute_new_cov() method, and the _var_scope + and _cov_shape properties. """ def __init__(self): @@ -272,16 +233,21 @@ class FisherFactor(object): @abc.abstractproperty def _var_scope(self): + """Variable scope for this FisherFactor instance. + + Returns: + string that unique identifies this FisherFactor instance. + """ pass @abc.abstractproperty def _cov_shape(self): - """The shape of the cov matrix.""" + """The shape of the variable backing this FisherFactor.""" pass @abc.abstractproperty def _num_sources(self): - """The number of things to sum over when computing cov. + """The number of things to sum over when updating covariance variable. The default make_covariance_update_op function will call _compute_new_cov with indices ranging from 0 to _num_sources-1. The typical situation is @@ -293,10 +259,12 @@ class FisherFactor(object): @abc.abstractproperty def _dtype(self): + """dtype for variable backing this factor.""" pass @property def _cov_initializer(self): + """Function for initializing covariance variable.""" return covariance_initializer def instantiate_covariance(self): @@ -311,6 +279,15 @@ class FisherFactor(object): @abc.abstractmethod def _compute_new_cov(self, idx=0): + """Computes minibatch-estimated covariance for a single source. + + Args: + idx: int in [0, self._num_sources). Which source to use when estimating + covariance. + + Returns: + Tensor of same shape as self.get_cov_var(). + """ pass def make_covariance_update_op(self, ema_decay): @@ -343,14 +320,101 @@ class FisherFactor(object): """Create and return update ops corresponding to registered computations.""" pass + @abc.abstractmethod def get_cov(self): + """Get full covariance matrix. + + Returns: + Tensor of shape [n, n]. Represents all parameter-parameter correlations + captured by this FisherFactor. + """ + pass + + def get_cov_var(self): + """Get variable backing this FisherFactor. + + May or may not be the same as self.get_cov() + + Returns: + Variable of shape self._cov_shape. + """ return self._cov + @abc.abstractmethod + def left_multiply(self, x, damping): + """Multiplies 'x' by the damped covariance of this factor. + + Let C be the covariance matrix this factor represents, and + D = C + damping * I be its damped variant. This method calculates + matmul(D, vec(x)). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + + @abc.abstractmethod + def right_multiply(self, x, damping): + """Multiplies 'x' by the damped covariance of this factor. + + Let C be the covariance matrix this factor represents, and + D = C + damping * I be its damped variant. This method calculates + matmul(vec(x), D). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + + @abc.abstractmethod + def left_multiply_inverse(self, x, damping): + """Multiplies 'x' by damped inverse of this factor. + + Let C be the covariance matrix this factor represents and + E = inv(C + damping * I) be its damped inverse. This method calculates + matmul(E, vec(x)). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + + @abc.abstractmethod + def right_multiply_inverse(self, x, damping): + """Multiplies 'x' by damped inverse of this factor. + + Let C be the covariance matrix this factor represents and + E = inv(C + damping * I) be its damped inverse. This method calculates + matmul(vec(x), E). + + Args: + x: Tensor. Represents a single vector. Shape depends on implementation. + damping: 0-D Tensor. Damping to add to C's diagonal. + + Returns: + Tensor of same shape as 'x'. + """ + pass + class InverseProvidingFactor(FisherFactor): - """Base class for FisherFactors that maintain inverses, powers, etc of _cov. + """Base class for FisherFactors that maintain inverses explicitly. - Assumes that the _cov property is a square PSD matrix. + This class explicitly calculates and stores inverses of covariance matrices + provided by the underlying FisherFactor implementation. It is assumed that + vectors can be represented as 2-D matrices. Subclasses must implement the _compute_new_cov method, and the _var_scope and _cov_shape properties. @@ -485,6 +549,61 @@ class InverseProvidingFactor(FisherFactor): def reset_eigendecomp(self): self._eigendecomp = None + def get_cov(self): + # Variable contains full covariance matrix. + return self.get_cov_var() + + def left_multiply(self, x, damping): + n = self.get_cov().shape[0] + damped_cov = self.get_cov() + damping * array_ops.eye(n) + + if isinstance(x, tf_ops.IndexedSlices): + raise NotImplementedError( + "Left-multiply not yet supported for IndexedSlices.") + + if len(x.shape) != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(damped_cov, x) + + def right_multiply(self, x, damping): + n = self.get_cov().shape[0] + damped_cov = self.get_cov() + damping * array_ops.eye(n) + + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_sparse_dense(x, damped_cov) + + if len(x.shape) != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(x, damped_cov) + + def left_multiply_inverse(self, x, damping): + if isinstance(x, tf_ops.IndexedSlices): + raise ValueError("Left-multiply not yet supported for IndexedSlices.") + + if x.shape.ndims != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(self.get_damped_inverse(damping), x) + + def right_multiply_inverse(self, x, damping): + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_sparse_dense(x, self.get_damped_inverse(damping)) + + if x.shape.ndims != 2: + raise ValueError( + "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." + % (x,)) + + return math_ops.matmul(x, self.get_damped_inverse(damping)) + class FullFactor(InverseProvidingFactor): """FisherFactor for a full matrix representation of the Fisher of a parameter. @@ -530,7 +649,11 @@ class FullFactor(InverseProvidingFactor): class DiagonalFactor(FisherFactor): - """A base class for FisherFactors that use diagonal approximations.""" + """A base class for FisherFactors that use diagonal approximations. + + A DiagonalFactor's covariance variable can be of any shape, but must contain + exactly one entry per parameter. + """ def __init__(self): super(DiagonalFactor, self).__init__() @@ -542,6 +665,45 @@ class DiagonalFactor(FisherFactor): def make_inverse_update_ops(self): return [] + def get_cov(self): + # self.get_cov() could be any shape, but it must have one entry per + # parameter. Flatten it into a vector. + cov_diag_vec = array_ops.reshape(self.get_cov_var(), [-1]) + return array_ops.diag(cov_diag_vec) + + def left_multiply(self, x, damping): + damped_cov = self.get_cov_var() + damping + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_diag_sparse(array_ops.reshape(damped_cov, [-1]), x) + + if x.shape != damped_cov.shape: + raise ValueError("x (%s) and cov (%s) must have same shape." % + (x, damped_cov)) + + return damped_cov * x + + def right_multiply(self, x, damping): + raise NotImplementedError("Only left-multiply is currently supported.") + + def left_multiply_inverse(self, x, damping): + inverse = 1. / (self.get_cov_var() + damping) + + if isinstance(x, tf_ops.IndexedSlices): + return utils.matmul_diag_sparse(array_ops.reshape(inverse, [-1]), x) + + if x.shape != inverse.shape: + raise ValueError("x (%s) and cov (%s) must have same shape." % + (x, inverse)) + + return inverse * x + + def right_multiply_inverse(self, x, damping): + raise NotImplementedError("Only left-multiply is currently supported.") + + def register_damped_inverse(self, damping): + # DiagonalFactors don't keep explicit inverses. + pass + class NaiveDiagonalFactor(DiagonalFactor): """FisherFactor for a diagonal approximation of any type of param's Fisher. @@ -553,6 +715,14 @@ class NaiveDiagonalFactor(DiagonalFactor): def __init__(self, params_grads, batch_size): + """Initializes NaiveDiagonalFactor instance. + + Args: + params_grads: Sequence of Tensors, each with same shape as parameters this + FisherFactor corresponds to. For example, the gradient of the loss with + respect to parameters. + batch_size: int or 0-D Tensor. Size + """ self._params_grads = tuple(utils.ensure_sequence(params_grad) for params_grad in params_grads) self._batch_size = batch_size @@ -567,7 +737,7 @@ class NaiveDiagonalFactor(DiagonalFactor): def _cov_shape(self): size = sum(param_grad.shape.num_elements() for param_grad in self._params_grads[0]) - return (size, 1) + return [size, 1] @property def _num_sources(self): @@ -584,6 +754,84 @@ class NaiveDiagonalFactor(DiagonalFactor): self._batch_size, params_grads_flat.dtype)) +class EmbeddingInputKroneckerFactor(DiagonalFactor): + r"""FisherFactor for input to an embedding layer. + + Given input_ids = [batch_size, input_size] representing indices into an + [vocab_size, embedding_size] embedding matrix, approximate input covariance by + a diagonal matrix, + + Cov(input_ids, input_ids) = + (1/batch_size) sum_{i} diag(n_hot(input[i]) ** 2). + + where n_hot() constructs an n-hot binary vector and diag() constructs a + diagonal matrix of size [vocab_size, vocab_size]. + """ + + def __init__(self, input_ids, vocab_size, dtype=None): + """Instantiate EmbeddingInputKroneckerFactor. + + Args: + input_ids: Tuple of Tensors of shape [batch_size, input_size] and dtype + int32. Indices into embedding matrix. + vocab_size: int or 0-D Tensor. Maximum value for entries in 'input_ids'. + dtype: dtype for covariance statistics. Must be a floating point type. + Defaults to float32. + """ + self._input_ids = input_ids + self._vocab_size = vocab_size + self._cov_dtype = dtype or dtypes.float32 + + super(EmbeddingInputKroneckerFactor, self).__init__() + + @property + def _var_scope(self): + return "ff_diag_embedding/" + scope_string_from_params(self._input_ids) + + @property + def _cov_shape(self): + return [self._vocab_size] + + @property + def _num_sources(self): + return len(self._input_ids) + + @property + def _dtype(self): + return self._cov_dtype + + def _compute_new_cov(self, idx=0): + with maybe_colocate_with(self._input_ids): + input_ids = self._input_ids[idx] + if len(input_ids.shape) > 2: + raise ValueError( + "Input to embeddings must have rank <= 2. Found rank %d." % len( + input_ids.shape)) + + batch_size = array_ops.shape(input_ids)[0] + + # Transform indices into one-hot vectors. + # + # TODO(b/72714822): There must be a faster way to construct the diagonal + # covariance matrix! This operation is O(batch_size * vocab_size), where + # it should be O(batch_size * input_size). + flat_input_ids = array_ops.reshape(input_ids, [-1]) + one_hots = array_ops.one_hot(flat_input_ids, + self._vocab_size) # [?, vocab_size] + + # Take average across examples. Note that, because all entries have + # magnitude zero or one, there's no need to square the entries. + # + # TODO(b/72714822): Support for SparseTensor, other kinds of aggregation + # within an example such as average. + # + # TODO(b/72714822): Support for partitioned embeddings. + new_cov = math_ops.reduce_sum(one_hots, axis=0) # [vocab_size] + new_cov /= math_ops.cast(batch_size, new_cov.dtype) + + return new_cov + + class FullyConnectedDiagonalFactor(DiagonalFactor): r"""FisherFactor for a diagonal approx of a fully-connected layer's Fisher. @@ -623,8 +871,9 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): @property def _cov_shape(self): - return [self._inputs.shape[1] + self._has_bias, - self._outputs_grads[0].shape[1]] + input_size = self._inputs.shape[1] + self._has_bias + output_size = self._outputs_grads[0].shape[1] + return [input_size, output_size] @property def _num_sources(self): @@ -717,10 +966,11 @@ class ConvDiagonalFactor(DiagonalFactor): # TODO(b/64144716): there is potential here for a big savings in terms # of memory use. - patches = extract_image_patches( + patches = array_ops.extract_image_patches( self._inputs, - ksizes=[filter_height, filter_width], - strides=self._strides[1:-1], + ksizes=[1, filter_height, filter_width, 1], + strides=self._strides, + rates=[1, 1, 1, 1], padding=self._padding) if self._has_bias: @@ -864,10 +1114,11 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. - patches = extract_image_patches( + patches = array_ops.extract_image_patches( self._inputs, - ksizes=[filter_height, filter_width], - strides=self._strides[1:-1], + ksizes=[1, filter_height, filter_width, 1], + strides=self._strides, + rates=[1, 1, 1, 1], padding=self._padding) flatten_size = (filter_height * filter_width * in_channels) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py b/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py index ad93919149..2d8e378a93 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors_lib.py @@ -24,26 +24,15 @@ from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import _allowed_symbols = [ - "inverse_initializer", - "covariance_initializer", - "diagonal_covariance_initializer", - "scope_string_from_params", - "scope_string_from_name", - "scalar_or_tensor_to_string", - "FisherFactor", - "InverseProvidingFactor", - "FullFactor", - "DiagonalFactor", - "NaiveDiagonalFactor", - "FullyConnectedDiagonalFactor", - "FullyConnectedKroneckerFactor", - "ConvInputKroneckerFactor", - "ConvOutputKroneckerFactor", - "ConvDiagonalFactor", - "set_global_constants", - "maybe_colocate_with", - "compute_cov", - "append_homog" + "inverse_initializer", "covariance_initializer", + "diagonal_covariance_initializer", "scope_string_from_params", + "scope_string_from_name", "scalar_or_tensor_to_string", "FisherFactor", + "InverseProvidingFactor", "FullFactor", "DiagonalFactor", + "NaiveDiagonalFactor", "EmbeddingInputKroneckerFactor", + "FullyConnectedDiagonalFactor", "FullyConnectedKroneckerFactor", + "ConvInputKroneckerFactor", "ConvOutputKroneckerFactor", + "ConvDiagonalFactor", "set_global_constants", "maybe_colocate_with", + "compute_cov", "append_homog" ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 8d450f04f3..ce9005b9ce 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -143,6 +143,7 @@ class LayerCollection(object): self._loss_dict = {} # {str: LossFunction} self._subgraph = None self._default_generic_approximation = APPROX_FULL_NAME + self._default_embedding_approximation = APPROX_KRONECKER_NAME self._default_fully_connected_approximation = APPROX_KRONECKER_NAME self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME self._default_fully_connected_multi_approximation = ( @@ -178,6 +179,17 @@ class LayerCollection(object): """ return self._linked_parameters + @property + def default_embedding_approximation(self): + return self._default_embedding_approximation + + def set_default_embedding_approximation(self, value): + if value != APPROX_KRONECKER_NAME: + raise ValueError( + "{} is not a valid approximation for embedding variables.".format( + value)) + self._default_embedding_approximation = value + @property def default_generic_approximation(self): return self._default_generic_approximation @@ -417,6 +429,46 @@ class LayerCollection(object): else: return None + def register_embedding(self, + params, + inputs, + outputs, + approx=None, + reuse=VARIABLE_SCOPE): + """Registers a fully connnected layer. + + Args: + params: Embedding matrix of shape [vocab_size, embedding_size]. + inputs: Tensor of shape [batch_size, input_size] and dtype int32. Indices + into embedding matrix. + outputs: Tensor of shape [batch_size, output_size]. Outputs + produced by layer. + approx: str. Must be "kron". + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. + + Raises: + ValueError: For improper value to 'approx'. + KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: If reuse == True and FisherBlock found but of the wrong type. + """ + if approx is None: + approx = self._get_linked_approx(params) + if approx is None: + approx = self.default_embedding_approximation + + if approx != APPROX_KRONECKER_NAME: + raise ValueError("Bad value {} for approx.".format(approx)) + + if isinstance(params, (tuple, list)): + raise ValueError("Bias not supported.") + + vocab_size = int(params.shape[0]) + block = self.register_block( + params, fb.EmbeddingKFACFB(self, vocab_size), reuse=reuse) + block.register_additional_minibatch(inputs, outputs) + def register_fully_connected(self, params, inputs, diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index e89508fa46..f5bd97cb4e 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -144,7 +144,9 @@ def layer_params_to_mat2d(vector): [-1, w_part.shape.as_list()[-1]]) return array_ops.concat( (w_part_reshaped, array_ops.reshape(b_part, [1, -1])), axis=0) - else: + elif isinstance(vector, ops.IndexedSlices): + return vector + else: # Tensor or Tensor-like. return array_ops.reshape(vector, [-1, vector.shape.as_list()[-1]]) @@ -163,6 +165,11 @@ def mat2d_to_layer_params(vector_template, mat2d): if isinstance(vector_template, (tuple, list)): w_part, b_part = mat2d[:-1], mat2d[-1] return array_ops.reshape(w_part, vector_template[0].shape), b_part + elif isinstance(vector_template, ops.IndexedSlices): + if not isinstance(mat2d, ops.IndexedSlices): + raise TypeError( + "If vector_template is an IndexedSlices, so should mat2d.") + return mat2d else: return array_ops.reshape(mat2d, vector_template.shape) @@ -420,5 +427,57 @@ def batch_execute(global_step, thunks, batch_size, name=None): return result +def matmul_sparse_dense(A, B, name=None): # pylint: disable=invalid-name + """Computes matmul(A, B) where A is sparse, B is dense. + + Args: + A: tf.IndexedSlices with dense shape [m, n]. + B: tf.Tensor with shape [n, k]. + name: str. Name of op. + + Returns: + tf.IndexedSlices resulting from matmul(A, B). + + Raises: + ValueError: If A doesn't represent a matrix. + ValueError: If B is not rank-2. + """ + with ops.name_scope(name, "matmul_sparse_dense", [A, B]): + if A.indices.shape.ndims != 1 or A.values.shape.ndims != 2: + raise ValueError("A must represent a matrix. Found: %s." % A) + if B.shape.ndims != 2: + raise ValueError("B must be a matrix.") + new_values = math_ops.matmul(A.values, B) + return ops.IndexedSlices( + new_values, + A.indices, + dense_shape=array_ops.stack([A.dense_shape[0], new_values.shape[1]])) + + +def matmul_diag_sparse(A_diag, B, name=None): # pylint: disable=invalid-name + """Computes matmul(A, B) where A is a diagonal matrix, B is sparse. + + Args: + A_diag: diagonal entries of matrix A of shape [m, m]. + B: tf.IndexedSlices. Represents matrix of shape [m, n]. + name: str. Name of op. + + Returns: + tf.IndexedSlices resulting from matmul(A, B). + + Raises: + ValueError: If A_diag is not rank-1. + ValueError: If B doesn't represent a matrix. + """ + with ops.name_scope(name, "matmul_diag_sparse", [A_diag, B]): + A_diag = ops.convert_to_tensor(A_diag) + if A_diag.shape.ndims != 1: + raise ValueError("A_diag must be a rank-1 Tensor.") + if B.indices.shape.ndims != 1 or B.values.shape.ndims != 2: + raise ValueError("B must represent a matrix. Found: %s." % B) + a = array_ops.gather(A_diag, B.indices) + a = array_ops.reshape(a, list(a.shape) + [1] * (B.values.shape.ndims - 1)) + return ops.IndexedSlices(a * B.values, B.indices, dense_shape=B.dense_shape) + # TODO(b/69623235): Add a function for finding tensors that share gradients # to eliminate redundant fisher factor computations. diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py index fe8e39c212..8e424a7946 100644 --- a/tensorflow/contrib/kfac/python/ops/utils_lib.py +++ b/tensorflow/contrib/kfac/python/ops/utils_lib.py @@ -40,6 +40,8 @@ _allowed_symbols = [ "fwd_gradients", "ensure_sequence", "batch_execute", + "matmul_sparse_dense", + "matmul_diag_sparse", ] remove_undocumented(__name__, allowed_exception_list=_allowed_symbols) -- GitLab From 71878e136473e4ea2d85593171fcf221d3bced2a Mon Sep 17 00:00:00 2001 From: Thomas Deegan Date: Thu, 15 Feb 2018 16:38:46 -0800 Subject: [PATCH 0084/2939] Update remove_control_dependencies.cc --- .../tools/graph_transforms/remove_control_dependencies.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc index ba6df633be..cba6b78fc5 100644 --- a/tensorflow/tools/graph_transforms/remove_control_dependencies.cc +++ b/tensorflow/tools/graph_transforms/remove_control_dependencies.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include -#include -#include +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/graph/node_builder.h" +#include "tensorflow/tools/graph_transforms/transform_utils.h" namespace tensorflow { namespace graph_transforms { -- GitLab From 3fa24cecf1a3486406a5c1d2af3452aba53f6686 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Thu, 15 Feb 2018 16:40:24 -0800 Subject: [PATCH 0085/2939] Adding Shape inference functions to infeed ops. PiperOrigin-RevId: 185923685 --- tensorflow/contrib/tpu/ops/infeed_ops.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/tpu/ops/infeed_ops.cc b/tensorflow/contrib/tpu/ops/infeed_ops.cc index 849c4a1102..efc546f9a6 100644 --- a/tensorflow/contrib/tpu/ops/infeed_ops.cc +++ b/tensorflow/contrib/tpu/ops/infeed_ops.cc @@ -41,6 +41,7 @@ REGISTER_OP("InfeedEnqueue") .Attr("dtype: type") .Attr("shape: shape = {}") .Attr("device_ordinal: int = -1") + .SetShapeFn(shape_inference::NoOutputs) .SetIsStateful() .Doc(R"doc( An op which feeds a single Tensor value into the computation. @@ -58,6 +59,7 @@ REGISTER_OP("InfeedEnqueueTuple") .Attr("dtypes: list(type)") .Attr("shapes: list(shape)") .Attr("device_ordinal: int = -1") + .SetShapeFn(shape_inference::NoOutputs) .SetIsStateful() .Doc(R"doc( An op which feeds multiple Tensor values into the computation as an XLA tuple. -- GitLab From af1cf84725cb776623fc42b275b965dfe452ce72 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 15 Feb 2018 17:02:12 -0800 Subject: [PATCH 0086/2939] Fixes broken test PiperOrigin-RevId: 185926797 --- .../kernel_tests/cache_dataset_op_test.py | 60 ++++++++++--------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py index b71652c980..02720a2e98 100644 --- a/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/cache_dataset_op_test.py @@ -28,6 +28,7 @@ from tensorflow.python.data.ops import iterator_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -202,44 +203,45 @@ class FilesystemCacheDatasetTest(test.TestCase): class MemoryCacheDatasetTest(test.TestCase): def testCacheDatasetPassthrough(self): - repeat_count = variables.Variable(constant_op.constant(10, dtypes.int64)) - dataset = dataset_ops.Dataset.range(3).flat_map( - lambda x: dataset_ops.Dataset.from_tensors(x).repeat(repeat_count)) + with ops.device("cpu:0"): + repeat_count = variables.Variable(constant_op.constant(10, dtypes.int64)) + dataset = dataset_ops.Dataset.range(3).flat_map( + lambda x: dataset_ops.Dataset.from_tensors(x).repeat(repeat_count)) - cached_dataset = dataset.cache().repeat(2) - uncached_dataset = dataset.repeat(2) + cached_dataset = dataset.cache().repeat(2) + uncached_dataset = dataset.repeat(2) - # Needs to be initializable to capture the variable. - cached_iterator = cached_dataset.make_initializable_iterator() - cached_next = cached_iterator.get_next() - uncached_iterator = uncached_dataset.make_initializable_iterator() - uncached_next = uncached_iterator.get_next() + # Needs to be initializable to capture the variable. + cached_iterator = cached_dataset.make_initializable_iterator() + cached_next = cached_iterator.get_next() + uncached_iterator = uncached_dataset.make_initializable_iterator() + uncached_next = uncached_iterator.get_next() - with self.test_session() as sess: + with self.test_session() as sess: - sess.run(repeat_count.initializer) - sess.run(cached_iterator.initializer) - sess.run(uncached_iterator.initializer) + sess.run(repeat_count.initializer) + sess.run(cached_iterator.initializer) + sess.run(uncached_iterator.initializer) - for i in range(3): - for _ in range(10): - self.assertEqual(sess.run(cached_next), i) - self.assertEqual(sess.run(uncached_next), i) + for i in range(3): + for _ in range(10): + self.assertEqual(sess.run(cached_next), i) + self.assertEqual(sess.run(uncached_next), i) - sess.run(repeat_count.assign(0)) + sess.run(repeat_count.assign(0)) - # The uncached iterator should now be empty. - with self.assertRaises(errors.OutOfRangeError): - sess.run(uncached_next) + # The uncached iterator should now be empty. + with self.assertRaises(errors.OutOfRangeError): + sess.run(uncached_next) - # The cached iterator replays from cache. - for i in range(3): - for _ in range(10): - self.assertEqual(sess.run(cached_next), i) + # The cached iterator replays from cache. + for i in range(3): + for _ in range(10): + self.assertEqual(sess.run(cached_next), i) - # The cached iterator should now be empty. - with self.assertRaises(errors.OutOfRangeError): - sess.run(cached_next) + # The cached iterator should now be empty. + with self.assertRaises(errors.OutOfRangeError): + sess.run(cached_next) def testEmptyCacheReading(self): components = (np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8]), -- GitLab From f5c581f0f4649898ed00650fe98c7ef344e0f240 Mon Sep 17 00:00:00 2001 From: Reed Wanderman-Milne Date: Thu, 15 Feb 2018 17:05:41 -0800 Subject: [PATCH 0087/2939] Use np.frombuffer instead of np.fromstring to avoid DeprecationWarning. Resolves #17020 PiperOrigin-RevId: 185927310 --- tensorflow/python/framework/tensor_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 0e5f696111..cbba112841 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -557,7 +557,7 @@ def MakeNdarray(tensor): dtype = tensor_dtype.as_numpy_dtype if tensor.tensor_content: - return np.fromstring(tensor.tensor_content, dtype=dtype).reshape(shape) + return np.frombuffer(tensor.tensor_content, dtype=dtype).reshape(shape) elif tensor_dtype == dtypes.float16: # the half_val field of the TensorProto stores the binary representation # of the fp16: we need to reinterpret this as a proper float16 -- GitLab From 11f1e50886f91ce2caa6e53b0bc9a1e82abdda8e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 17:38:55 -0800 Subject: [PATCH 0088/2939] Keep the results below 2^31 in exp() test to avoid overflowing. PiperOrigin-RevId: 185931075 --- tensorflow/contrib/lite/testing/generate_examples.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 1ced3bfd73..944031da24 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -775,7 +775,8 @@ def make_exp_tests(zip_path): def build_inputs(parameters, sess, inputs, outputs): values = [ - create_tensor_data(parameters["input_dtype"], parameters["input_shape"]) + create_tensor_data(parameters["input_dtype"], parameters["input_shape"], + min_value=-100, max_value=9) ] return values, sess.run(outputs, feed_dict=dict(zip(inputs, values))) -- GitLab From 33071159b30278d9e5a1802480c03e5029fa4c93 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 17:44:59 -0800 Subject: [PATCH 0089/2939] Address timeout of conv_ops_test. PiperOrigin-RevId: 185931585 --- tensorflow/core/kernels/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 523e395699..cee3c55d1a 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1040,7 +1040,7 @@ tf_cc_test( tf_cc_test( name = "conv_ops_test", - size = "small", + size = "medium", srcs = ["conv_ops_test.cc"], deps = [ ":conv_ops", -- GitLab From b476a6eca15c9952293878728a2d0105e4223ac0 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 15 Feb 2018 18:21:11 -0800 Subject: [PATCH 0090/2939] Add stateful metrics support in tf.keras. PiperOrigin-RevId: 185935092 --- .../python/keras/_impl/keras/__init__.py | 2 +- .../python/keras/_impl/keras/callbacks.py | 38 ++++- .../keras/_impl/keras/engine/training.py | 140 +++++++++++------- .../python/keras/_impl/keras/metrics.py | 17 ++- .../python/keras/_impl/keras/metrics_test.py | 71 +++++++++ .../api/golden/tensorflow.keras.-model.pbtxt | 2 +- ...sorflow.keras.callbacks.-base-logger.pbtxt | 2 +- ...flow.keras.callbacks.-progbar-logger.pbtxt | 2 +- .../api/golden/tensorflow.keras.metrics.pbtxt | 2 +- .../tensorflow.keras.models.-model.pbtxt | 2 +- 10 files changed, 208 insertions(+), 70 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/__init__.py b/tensorflow/python/keras/_impl/keras/__init__.py index 7311353932..b63907b2e6 100644 --- a/tensorflow/python/keras/_impl/keras/__init__.py +++ b/tensorflow/python/keras/_impl/keras/__init__.py @@ -40,4 +40,4 @@ from tensorflow.python.keras._impl.keras.layers import Input from tensorflow.python.keras._impl.keras.models import Model from tensorflow.python.keras._impl.keras.models import Sequential -__version__ = '2.1.3-tf' +__version__ = '2.1.4-tf' diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index de013c7c3f..f6c4661425 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -164,7 +164,7 @@ class CallbackList(object): class Callback(object): """Abstract base class used to build new callbacks. - # Properties + Attributes: params: dict. Training parameters (eg. verbosity, batch size, number of epochs...). model: instance of `keras.models.Model`. @@ -222,8 +222,18 @@ class BaseLogger(Callback): """Callback that accumulates epoch averages of metrics. This callback is automatically applied to every Keras model. + + Arguments: + stateful_metrics: Iterable of string names of metrics that + should *not* be averaged over an epoch. + Metrics in this list will be logged as-is in `on_epoch_end`. + All others will be averaged in `on_epoch_end`. """ + def __init__(self, stateful_metrics=None): + super(BaseLogger, self).__init__() + self.stateful_metrics = set(stateful_metrics or []) + def on_epoch_begin(self, epoch, logs=None): self.seen = 0 self.totals = {} @@ -234,17 +244,23 @@ class BaseLogger(Callback): self.seen += batch_size for k, v in logs.items(): - if k in self.totals: - self.totals[k] += v * batch_size + if k in self.stateful_metrics: + self.totals[k] = v else: - self.totals[k] = v * batch_size + if k in self.totals: + self.totals[k] += v * batch_size + else: + self.totals[k] = v * batch_size def on_epoch_end(self, epoch, logs=None): if logs is not None: for k in self.params['metrics']: if k in self.totals: # Make value available to next callbacks. - logs[k] = self.totals[k] / self.seen + if k in self.stateful_metrics: + logs[k] = self.totals[k] + else: + logs[k] = self.totals[k] / self.seen @tf_export('keras.callbacks.TerminateOnNaN') @@ -272,12 +288,16 @@ class ProgbarLogger(Callback): count_mode: One of "steps" or "samples". Whether the progress bar should count samples seen or steps (batches) seen. + stateful_metrics: Iterable of string names of metrics that + should *not* be averaged over an epoch. + Metrics in this list will be logged as-is. + All others will be averaged over time (e.g. loss, etc). Raises: ValueError: In case of invalid `count_mode`. """ - def __init__(self, count_mode='samples'): + def __init__(self, count_mode='samples', stateful_metrics=None): super(ProgbarLogger, self).__init__() if count_mode == 'samples': self.use_steps = False @@ -285,6 +305,7 @@ class ProgbarLogger(Callback): self.use_steps = True else: raise ValueError('Unknown `count_mode`: ' + str(count_mode)) + self.stateful_metrics = set(stateful_metrics or []) def on_train_begin(self, logs=None): self.verbose = self.params['verbose'] @@ -298,7 +319,10 @@ class ProgbarLogger(Callback): else: target = self.params['samples'] self.target = target - self.progbar = Progbar(target=self.target, verbose=self.verbose) + self.progbar = Progbar( + target=self.target, + verbose=self.verbose, + stateful_metrics=self.stateful_metrics) self.seen = 0 def on_batch_begin(self, batch, logs=None): diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index a71f371b8e..fd14bf3d05 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -31,6 +31,7 @@ from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras import optimizers from tensorflow.python.keras._impl.keras.engine import training_eager +from tensorflow.python.keras._impl.keras.engine.topology import Layer from tensorflow.python.keras._impl.keras.engine.topology import Network from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer @@ -274,7 +275,7 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): losses.categorical_crossentropy } for y, loss, shape in zip(targets, loss_fns, output_shapes): - if loss is None: + if y is None or loss is None: continue if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: @@ -487,7 +488,7 @@ def _standardize_weights(y, raise ValueError('`class_weight` not supported for ' '3+ dimensional targets.') if y.shape[1] > 1: - y_classes = y.argmax(axis=1) + y_classes = np.argmax(y, axis=1) elif y.shape[1] == 1: y_classes = np.reshape(y, y.shape[0]) else: @@ -519,7 +520,7 @@ class Model(Network): def compile(self, optimizer, - loss, + loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, @@ -581,7 +582,7 @@ class Model(Network): self.optimizer = optimizers.get(optimizer) self.loss = loss - self.metrics = metrics + self.metrics = metrics or [] self.loss_weights = loss_weights if context.in_eager_mode() and sample_weight_mode is not None: raise ValueError('sample_weight_mode is not supported in Eager mode.') @@ -817,7 +818,6 @@ class Model(Network): self._feed_sample_weight_modes.append(self.sample_weight_modes[i]) # Prepare metrics. - self.metrics = metrics self.weighted_metrics = weighted_metrics self.metrics_names = ['loss'] self.metrics_tensors = [] @@ -860,14 +860,8 @@ class Model(Network): nested_metrics = _collect_metrics(metrics, self.output_names) nested_weighted_metrics = _collect_metrics(weighted_metrics, self.output_names) - - def append_metric(layer_index, metric_name, metric_tensor): - """Helper function used in loop below.""" - if len(self.output_names) > 1: - metric_name = self.output_names[layer_index] + '_' + metric_name - self.metrics_names.append(metric_name) - self.metrics_tensors.append(metric_tensor) - + self.metrics_updates = [] + self.stateful_metric_names = [] with K.name_scope('metrics'): for i in range(len(self.outputs)): if i in skip_target_indices: @@ -886,42 +880,65 @@ class Model(Network): if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): # custom handling of accuracy/crossentropy # (because of class mode duality) - output_shape = K.int_shape(self.outputs[i]) + output_shape = self.outputs[i].get_shape().as_list() if (output_shape[-1] == 1 or self.loss_functions[i] == losses.binary_crossentropy): # case: binary accuracy/crossentropy if metric in ('accuracy', 'acc'): - acc_fn = metrics_module.binary_accuracy + metric_fn = metrics_module.binary_accuracy elif metric in ('crossentropy', 'ce'): - acc_fn = metrics_module.binary_crossentropy + metric_fn = metrics_module.binary_crossentropy elif self.loss_functions[ i] == losses.sparse_categorical_crossentropy: # case: categorical accuracy/crossentropy with sparse targets if metric in ('accuracy', 'acc'): - acc_fn = metrics_module.sparse_categorical_accuracy + metric_fn = metrics_module.sparse_categorical_accuracy elif metric in ('crossentropy', 'ce'): - acc_fn = metrics_module.sparse_categorical_crossentropy + metric_fn = metrics_module.sparse_categorical_crossentropy else: # case: categorical accuracy/crossentropy if metric in ('accuracy', 'acc'): - acc_fn = metrics_module.categorical_accuracy + metric_fn = metrics_module.categorical_accuracy elif metric in ('crossentropy', 'ce'): - acc_fn = metrics_module.categorical_crossentropy + metric_fn = metrics_module.categorical_crossentropy if metric in ('accuracy', 'acc'): suffix = 'acc' elif metric in ('crossentropy', 'ce'): suffix = 'ce' - weighted_metric_fn = _weighted_masked_objective(acc_fn) + weighted_metric_fn = _weighted_masked_objective(metric_fn) metric_name = metric_name_prefix + suffix else: metric_fn = metrics_module.get(metric) weighted_metric_fn = _weighted_masked_objective(metric_fn) - metric_name = metric_name_prefix + metric_fn.__name__ + # Get metric name as string + if hasattr(metric_fn, 'name'): + metric_name = metric_fn.name + else: + metric_name = metric_fn.__name__ + metric_name = metric_name_prefix + metric_name with K.name_scope(metric_name): metric_result = weighted_metric_fn( y_true, y_pred, weights=weights, mask=masks[i]) - append_metric(i, metric_name, metric_result) + + # Append to self.metrics_names, self.metric_tensors, + # self.stateful_metric_names + if len(self.output_names) > 1: + metric_name = '%s_%s' % (self.output_names[i], metric_name) + # Dedupe name + j = 1 + base_metric_name = metric_name + while metric_name in self.metrics_names: + metric_name = '%s_%d' % (base_metric_name, j) + j += 1 + self.metrics_names.append(metric_name) + self.metrics_tensors.append(metric_result) + + # Keep track of state updates created by + # stateful metrics (i.e. metrics layers). + if isinstance(metric_fn, Layer): + self.stateful_metric_names.append(metric_name) + self.metrics_updates += metric_fn.updates handle_metrics(output_metrics) handle_metrics(output_weighted_metrics, weights=weights) @@ -986,6 +1003,8 @@ class Model(Network): updates += self.get_updates_for(None) # Conditional updates relevant to this model updates += self.get_updates_for(self._feed_inputs) + # Stateful metrics updates + updates += self.metrics_updates # Gets loss and metrics. Updates weights at each call. self.train_function = K.function( inputs, [self.total_loss] + self.metrics_tensors, @@ -1006,7 +1025,7 @@ class Model(Network): # Does update the network states. self.test_function = K.function( inputs, [self.total_loss] + self.metrics_tensors, - updates=self.state_updates, + updates=self.state_updates + self.metrics_updates, name='test_function', **self._function_kwargs) @@ -1145,14 +1164,18 @@ class Model(Network): index_array = np.arange(num_train_samples) self.history = cbks.History() - callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history] + all_callbacks = [cbks.BaseLogger( + stateful_metrics=self.stateful_metric_names)] if verbose: if steps_per_epoch is not None: count_mode = 'steps' else: count_mode = 'samples' - callbacks += [cbks.ProgbarLogger(count_mode)] - callbacks = cbks.CallbackList(callbacks) + all_callbacks.append( + cbks.ProgbarLogger( + count_mode, stateful_metrics=self.stateful_metric_names)) + all_callbacks += (callbacks or []) + [self.history] + callbacks = cbks.CallbackList(all_callbacks) out_labels = out_labels or [] # it's possible to callback a different model than self @@ -1186,6 +1209,11 @@ class Model(Network): indices_for_conversion_to_dense.append(i) for epoch in range(initial_epoch, epochs): + # Reset stateful metrics + for m in self.metrics: + if isinstance(m, Layer): + m.reset_states() + # Update callbacks callbacks.on_epoch_begin(epoch) epoch_logs = {} if steps_per_epoch is not None: @@ -1286,12 +1314,19 @@ class Model(Network): or list of arrays of predictions (if the model has multiple outputs). """ + if hasattr(self, 'metrics'): + for m in self.metrics: + if isinstance(m, Layer): + m.reset_states() + num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') if verbose == 1: if steps is not None: - progbar = Progbar(target=steps) + progbar = Progbar(target=steps, + stateful_metrics=self.stateful_metric_names) else: - progbar = Progbar(target=num_samples) + progbar = Progbar(target=num_samples, + stateful_metrics=self.stateful_metric_names) indices_for_conversion_to_dense = [] for i in range(len(self._feed_inputs)): @@ -1373,6 +1408,17 @@ class Model(Network): and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ + if hasattr(self, 'metrics'): + for m in self.metrics: + if isinstance(m, Layer): + m.reset_states() + stateful_metric_indices = [ + i for i, name in enumerate(self.metrics_names) + if str(name) in self.stateful_metric_names + ] + else: + stateful_metric_indices = [] + num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') outs = [] if verbose == 1: @@ -1396,7 +1442,10 @@ class Model(Network): for _ in enumerate(batch_outs): outs.append(0.) for i, batch_out in enumerate(batch_outs): - outs[i] += batch_out + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out else: if step == 0: outs.append(0.) @@ -1404,7 +1453,8 @@ class Model(Network): if verbose == 1: progbar.update(step + 1) for i in range(len(outs)): - outs[i] /= steps + if i not in stateful_metric_indices: + outs[i] /= steps else: batches = make_batches(num_samples, batch_size) index_array = np.arange(num_samples) @@ -1425,7 +1475,10 @@ class Model(Network): for batch_out in enumerate(batch_outs): outs.append(0.) for i, batch_out in enumerate(batch_outs): - outs[i] += batch_out * len(batch_ids) + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out * len(batch_ids) else: if batch_index == 0: outs.append(0.) @@ -1433,7 +1486,8 @@ class Model(Network): if verbose == 1: progbar.update(batch_end) for i in range(len(outs)): - outs[i] /= num_samples + if i not in stateful_metric_indices: + outs[i] /= num_samples if len(outs) == 1: return outs[0] return outs @@ -1655,20 +1709,6 @@ class Model(Network): str(x[0].shape[0]) + ' samples') return x, y, sample_weights - def _get_deduped_metrics_names(self): - out_labels = self.metrics_names - - # Rename duplicated metrics name - # (can happen with an output layer shared among multiple dataflows). - deduped_out_labels = [] - for i, label in enumerate(out_labels): - new_label = label - if out_labels.count(label) > 1: - dup_idx = out_labels[:i].count(label) - new_label += '_' + str(dup_idx + 1) - deduped_out_labels.append(new_label) - return deduped_out_labels - def _set_inputs(self, inputs): """Set model's input and output specs based on the input data received. @@ -1992,7 +2032,7 @@ class Model(Network): ins = x + y + sample_weights # Prepare display labels. - out_labels = self._get_deduped_metrics_names() + out_labels = self.metrics_names if context.in_eager_mode(): if do_validation: @@ -2471,8 +2511,8 @@ class Model(Network): ' the `keras.utils.Sequence` class.') # Prepare display labels. - out_labels = self._get_deduped_metrics_names() - callback_metrics = out_labels + ['val_' + n for n in out_labels] + out_labels = self.metrics_names + callback_metrics = out_labels + ['val_%s' % n for n in out_labels] # prepare callbacks self.history = cbks.History() diff --git a/tensorflow/python/keras/_impl/keras/metrics.py b/tensorflow/python/keras/_impl/keras/metrics.py index 0e2fb6365a..82778a3dc4 100644 --- a/tensorflow/python/keras/_impl/keras/metrics.py +++ b/tensorflow/python/keras/_impl/keras/metrics.py @@ -36,6 +36,7 @@ from tensorflow.python.keras._impl.keras.losses import poisson from tensorflow.python.keras._impl.keras.losses import sparse_categorical_crossentropy from tensorflow.python.keras._impl.keras.losses import squared_hinge from tensorflow.python.keras._impl.keras.utils.generic_utils import deserialize_keras_object +from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object from tensorflow.python.util.tf_export import tf_export @@ -79,13 +80,13 @@ cosine = cosine_proximity @tf_export('keras.metrics.serialize') def serialize(metric): - return metric.__name__ + return serialize_keras_object(metric) @tf_export('keras.metrics.deserialize') -def deserialize(name, custom_objects=None): +def deserialize(config, custom_objects=None): return deserialize_keras_object( - name, + config, module_objects=globals(), custom_objects=custom_objects, printable_module_name='metric function') @@ -93,11 +94,13 @@ def deserialize(name, custom_objects=None): @tf_export('keras.metrics.get') def get(identifier): - if isinstance(identifier, six.string_types): - identifier = str(identifier) - return deserialize(identifier) + if isinstance(identifier, dict): + config = {'class_name': str(identifier), 'config': {}} + return deserialize(config) + elif isinstance(identifier, six.string_types): + return deserialize(str(identifier)) elif callable(identifier): return identifier else: raise ValueError('Could not interpret ' - 'metric function identifier:', identifier) + 'metric function identifier: %s' % identifier) diff --git a/tensorflow/python/keras/_impl/keras/metrics_test.py b/tensorflow/python/keras/_impl/keras/metrics_test.py index f4792f3543..44289ea02a 100644 --- a/tensorflow/python/keras/_impl/keras/metrics_test.py +++ b/tensorflow/python/keras/_impl/keras/metrics_test.py @@ -72,6 +72,77 @@ class KerasMetricsTest(test.TestCase): keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.) + def test_stateful_metrics(self): + np.random.seed(1334) + + class BinaryTruePositives(keras.layers.Layer): + """Stateful Metric to count the total true positives over all batches. + + Assumes predictions and targets of shape `(samples, 1)`. + + Arguments: + threshold: Float, lower limit on prediction value that counts as a + positive class prediction. + name: String, name for the metric. + """ + + def __init__(self, name='true_positives', **kwargs): + super(BinaryTruePositives, self).__init__(name=name, **kwargs) + self.true_positives = keras.backend.variable(value=0, dtype='int32') + + def reset_states(self): + keras.backend.set_value(self.true_positives, 0) + + def __call__(self, y_true, y_pred): + """Computes the number of true positives in a batch. + + Args: + y_true: Tensor, batch_wise labels + y_pred: Tensor, batch_wise predictions + + Returns: + The total number of true positives seen this epoch at the + completion of the batch. + """ + y_true = keras.backend.cast(y_true, 'int32') + y_pred = keras.backend.cast(keras.backend.round(y_pred), 'int32') + correct_preds = keras.backend.cast( + keras.backend.equal(y_pred, y_true), 'int32') + true_pos = keras.backend.cast( + keras.backend.sum(correct_preds * y_true), 'int32') + current_true_pos = self.true_positives * 1 + self.add_update(keras.backend.update_add(self.true_positives, + true_pos), + inputs=[y_true, y_pred]) + return current_true_pos + true_pos + + metric_fn = BinaryTruePositives() + config = keras.metrics.serialize(metric_fn) + metric_fn = keras.metrics.deserialize( + config, custom_objects={'BinaryTruePositives': BinaryTruePositives}) + + # Test on simple model + inputs = keras.Input(shape=(2,)) + outputs = keras.layers.Dense(1, activation='sigmoid')(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer='sgd', + loss='binary_crossentropy', + metrics=['acc', metric_fn]) + + # Test fit, evaluate + samples = 1000 + x = np.random.random((samples, 2)) + y = np.random.randint(2, size=(samples, 1)) + model.fit(x, y, epochs=1, batch_size=10) + outs = model.evaluate(x, y, batch_size=10) + preds = model.predict(x) + + def ref_true_pos(y_true, y_pred): + return np.sum(np.logical_and(y_pred > 0.5, y_true == 1)) + + # Test correctness (e.g. updates should have been run) + self.assertAllClose(outs[2], ref_true_pos(y, preds), atol=1e-5) + if __name__ == '__main__': test.main() diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 76cf84084f..a13bfe0a92 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -144,7 +144,7 @@ tf_class { } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "compute_mask" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt index ea4d514354..454823fd23 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-base-logger.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'self\', \'stateful_metrics\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt index 0e6901f28a..543de0ad48 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.callbacks.-progbar-logger.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "" member_method { name: "__init__" - argspec: "args=[\'self\', \'count_mode\'], varargs=None, keywords=None, defaults=[\'samples\'], " + argspec: "args=[\'self\', \'count_mode\', \'stateful_metrics\'], varargs=None, keywords=None, defaults=[\'samples\', \'None\'], " } member_method { name: "on_batch_begin" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt index de285c1aab..42729e4237 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.metrics.pbtxt @@ -22,7 +22,7 @@ tf_module { } member_method { name: "deserialize" - argspec: "args=[\'name\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " + argspec: "args=[\'config\', \'custom_objects\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "get" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index d8d4eb5ca7..f85b328e34 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -144,7 +144,7 @@ tf_class { } member_method { name: "compile" - argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " + argspec: "args=[\'self\', \'optimizer\', \'loss\', \'metrics\', \'loss_weights\', \'sample_weight_mode\', \'weighted_metrics\', \'target_tensors\'], varargs=None, keywords=kwargs, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } member_method { name: "compute_mask" -- GitLab From 5315ff2613acaa288ab818d082a95f37f5f05bb4 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 15 Feb 2018 18:22:21 -0800 Subject: [PATCH 0091/2939] Bug fix and typo fixes. PiperOrigin-RevId: 185935199 --- tensorflow/python/keras/BUILD | 2 +- .../python/keras/_impl/keras/backend.py | 65 ++-- .../keras/_impl/keras/engine/topology.py | 23 +- .../keras/_impl/keras/engine/topology_test.py | 21 + .../keras/_impl/keras/layers/convolutional.py | 8 +- .../_impl/keras/layers/convolutional_test.py | 363 +++++++++--------- .../python/keras/_impl/keras/testing_utils.py | 30 +- tensorflow/python/layers/convolutional.py | 4 +- tensorflow/python/layers/network.py | 2 +- 9 files changed, 283 insertions(+), 235 deletions(-) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index d97a035256..1956478f39 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -395,7 +395,7 @@ py_test( py_test( name = "convolutional_test", - size = "medium", + size = "large", srcs = ["_impl/keras/layers/convolutional_test.py"], srcs_version = "PY2AND3", tags = [ diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index afa183b0a0..1fa264660d 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -258,7 +258,7 @@ def set_image_data_format(data_format): """ global _IMAGE_DATA_FORMAT if data_format not in {'channels_last', 'channels_first'}: - raise ValueError('Unknown data_format:', data_format) + raise ValueError('Unknown data_format: ' + str(data_format)) _IMAGE_DATA_FORMAT = str(data_format) @@ -342,9 +342,6 @@ def learning_phase(): Returns: Learning phase (scalar integer tensor or Python integer). - - Raises: - ValueError: If called when Eager execution is enabled. """ if context.in_eager_mode(): if 'eager' not in _GRAPH_LEARNING_PHASES: @@ -489,7 +486,7 @@ def _get_available_gpus(): def _has_nchw_support(): """Check whether the current scope supports NCHW ops. - Tensorflow does not support NCHW on CPU. Therefore we check if we are not + TensorFlow does not support NCHW on CPU. Therefore we check if we are not explicitly put on CPU, and have GPUs available. In this case there will be soft-placing on the GPU device. @@ -2233,7 +2230,7 @@ def resize_images(x, height_factor, width_factor, data_format): if original_shape[2] is not None else None, None)) return x else: - raise ValueError('Invalid data_format:', data_format) + raise ValueError('Invalid data_format: ' + str(data_format)) @tf_export('keras.backend.resize_volumes') @@ -2265,7 +2262,7 @@ def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): output = repeat_elements(output, width_factor, axis=3) return output else: - raise ValueError('Invalid data_format:', data_format) + raise ValueError('Invalid data_format: ' + str(data_format)) @tf_export('keras.backend.repeat_elements') @@ -2347,7 +2344,7 @@ def arange(start, stop=None, step=1, dtype='int32'): The function arguments use the same convention as Theano's arange: if only one argument is provided, - it is in fact the "stop" argument. + it is in fact the "stop" argument and "start" is 0. The default type of the returned tensor is `'int32'` to match TensorFlow's default. @@ -2362,7 +2359,7 @@ def arange(start, stop=None, step=1, dtype='int32'): An integer tensor. """ - # Match the behavior of numpy and Theano by returning an empty seqence. + # Match the behavior of numpy and Theano by returning an empty sequence. if stop is None and start < 0: start = 0 result = math_ops.range(start, limit=stop, delta=step, name='arange') @@ -2483,7 +2480,7 @@ def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if data_format == 'channels_first': pattern = [[0, 0], [0, 0], list(padding[0]), list(padding[1])] @@ -2524,7 +2521,7 @@ def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if data_format == 'channels_first': pattern = [[0, 0], [0, 0], [padding[0][0], padding[0][1]], @@ -2797,7 +2794,7 @@ def function(inputs, outputs, updates=None, **kwargs): for key in kwargs: if (key not in tf_inspect.getargspec(session_module.Session.run)[0] and key not in tf_inspect.getargspec(Function.__init__)[0]): - msg = ('Invalid argument "%s" passed to K.function with Tensorflow ' + msg = ('Invalid argument "%s" passed to K.function with TensorFlow ' 'backend') % key raise ValueError(msg) return Function(inputs, outputs, updates=updates, **kwargs) @@ -2916,7 +2913,7 @@ def rnn(step_function, if unroll: if not inputs.get_shape()[0]: - raise ValueError('Unrolling requires a ' 'fixed number of timesteps.') + raise ValueError('Unrolling requires a fixed number of timesteps.') states = initial_states successive_states = [] successive_outputs = [] @@ -3553,7 +3550,7 @@ def _preprocess_conv3d_input(x, data_format): def _preprocess_padding(padding): - """Convert keras' padding to tensorflow's padding. + """Convert keras' padding to TensorFlow's padding. Arguments: padding: string, one of 'same' , 'valid' @@ -3569,7 +3566,7 @@ def _preprocess_padding(padding): elif padding == 'valid': padding = 'VALID' else: - raise ValueError('Invalid padding:', padding) + raise ValueError('Invalid padding: ' + str(padding)) return padding @@ -3600,7 +3597,7 @@ def conv1d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) kernel_shape = kernel.get_shape().as_list() if padding == 'causal': @@ -3652,7 +3649,7 @@ def conv2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) @@ -3699,7 +3696,7 @@ def conv2d_transpose(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if isinstance(output_shape, (tuple, list)): output_shape = array_ops.stack(output_shape) @@ -3758,16 +3755,18 @@ def separable_conv1d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv1d_input(x, data_format) padding = _preprocess_padding(padding) + if not isinstance(strides, tuple): + strides = tuple(strides) if tf_data_format == 'NHWC': spatial_start_dim = 1 - strides = (1, 1) + strides + (1,) + strides = (1,) + strides * 2 + (1,) else: spatial_start_dim = 2 - strides = (1, 1, 1) + strides + strides = (1, 1) + strides * 2 x = array_ops.expand_dims(x, spatial_start_dim) depthwise_kernel = array_ops.expand_dims(depthwise_kernel, 0) pointwise_kernel = array_ops.expand_dims(pointwise_kernel, 0) @@ -3820,10 +3819,12 @@ def separable_conv2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) + if not isinstance(strides, tuple): + strides = tuple(strides) if tf_data_format == 'NHWC': strides = (1,) + strides + (1,) else: @@ -3869,7 +3870,7 @@ def depthwise_conv2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) @@ -3919,7 +3920,7 @@ def conv3d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv3d_input(x, data_format) padding = _preprocess_padding(padding) @@ -3965,7 +3966,7 @@ def conv3d_transpose(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) if isinstance(output_shape, (tuple, list)): output_shape = array_ops.stack(output_shape) @@ -4024,7 +4025,7 @@ def pool2d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv2d_input(x, data_format) padding = _preprocess_padding(padding) @@ -4042,7 +4043,7 @@ def pool2d(x, x = nn.avg_pool( x, pool_size, strides, padding=padding, data_format=tf_data_format) else: - raise ValueError('Invalid pooling mode:', pool_mode) + raise ValueError('Invalid pooling mode: ' + str(pool_mode)) if data_format == 'channels_first' and tf_data_format == 'NHWC': x = array_ops.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW @@ -4077,7 +4078,7 @@ def pool3d(x, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) x, tf_data_format = _preprocess_conv3d_input(x, data_format) padding = _preprocess_padding(padding) @@ -4095,7 +4096,7 @@ def pool3d(x, x = nn.avg_pool3d( x, pool_size, strides, padding=padding, data_format=tf_data_format) else: - raise ValueError('Invalid pooling mode:', pool_mode) + raise ValueError('Invalid pooling mode: ' + str(pool_mode)) if data_format == 'channels_first' and tf_data_format == 'NDHWC': x = array_ops.transpose(x, (0, 4, 1, 2, 3)) @@ -4126,7 +4127,7 @@ def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) stride = strides[0] kernel_shape = int_shape(kernel) @@ -4182,7 +4183,7 @@ def local_conv2d(inputs, if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) stride_row, stride_col = strides output_row, output_col = output_shape @@ -4235,7 +4236,7 @@ def bias_add(x, bias, data_format=None): if data_format is None: data_format = image_data_format() if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) + raise ValueError('Unknown data_format: ' + str(data_format)) bias_shape = int_shape(bias) if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1: raise ValueError( diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/topology.py index b267fac7df..dd7436e3d0 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology.py @@ -1154,10 +1154,8 @@ class Network(tf_network.GraphNetwork, Layer): proceed = ask_to_proceed_with_overwrite(filepath) if not proceed: return - f = h5py.File(filepath, 'w') - save_weights_to_hdf5_group(f, self.layers) - f.flush() - f.close() + with h5py.File(filepath, 'w') as f: + save_weights_to_hdf5_group(f, self.layers) def load_weights(self, filepath, by_name=False): """Loads all layer weights from a HDF5 save file. @@ -1184,16 +1182,13 @@ class Network(tf_network.GraphNetwork, Layer): """ if h5py is None: raise ImportError('`load_weights` requires h5py.') - f = h5py.File(filepath, mode='r') - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - if by_name: - load_weights_from_hdf5_group_by_name(f, self.layers) - else: - load_weights_from_hdf5_group(f, self.layers) - - if hasattr(f, 'close'): - f.close() + with h5py.File(filepath, 'r') as f: + if 'layer_names' not in f.attrs and 'model_weights' in f: + f = f['model_weights'] + if by_name: + load_weights_from_hdf5_group_by_name(f, self.layers) + else: + load_weights_from_hdf5_group(f, self.layers) def _updated_config(self): """Util hared between different serialization methods. diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index 0673e42376..28ddc094ee 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -555,6 +555,27 @@ class TopologyConstructionTest(test.TestCase): model = keras.models.Model(a, b) self.assertEqual(model.output_mask.get_shape().as_list(), [None, 10]) + def test_activity_regularization_with_model_composition(self): + + def reg(x): + return keras.backend.sum(x) + + net_a_input = keras.Input((2,)) + net_a = net_a_input + net_a = keras.layers.Dense(2, kernel_initializer='ones', + use_bias=False, + activity_regularizer=reg)(net_a) + model_a = keras.Model([net_a_input], [net_a]) + + net_b_input = keras.Input((2,)) + net_b = model_a(net_b_input) + model_b = keras.Model([net_b_input], [net_b]) + + model_b.compile(optimizer='sgd', loss=None) + x = np.ones((1, 2)) + loss = model_b.evaluate(x) + self.assertEqual(loss, 4.) + def test_weight_preprocessing(self): input_dim = 3 output_dim = 3 diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional.py b/tensorflow/python/keras/_impl/keras/layers/convolutional.py index bc43451114..162ae6c28f 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional.py @@ -60,7 +60,7 @@ class Conv1D(tf_convolutional_layers.Conv1D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of a single integer, specifying the length of the 1D convolution window. strides: An integer or tuple/list of a single integer, @@ -173,7 +173,7 @@ class Conv2D(tf_convolutional_layers.Conv2D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for @@ -308,7 +308,7 @@ class Conv3D(tf_convolutional_layers.Conv3D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 3 integers, specifying the depth, height and width of the 3D convolution window. Can be a single integer to specify the same value for @@ -877,7 +877,7 @@ class SeparableConv2D(tf_convolutional_layers.SeparableConv2D, Layer): Arguments: filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). + (i.e. the number of output filters in the convolution). kernel_size: An integer or tuple/list of 2 integers, specifying the width and height of the 2D convolution window. Can be a single integer to specify the same value for diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py index 39c9d4f0fb..4a6228121b 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import copy + import numpy as np from tensorflow.python.keras._impl import keras @@ -27,45 +29,39 @@ from tensorflow.python.platform import test class Convolution1DTest(test.TestCase): - def test_dilated_conv1d(self): - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv1D, - input_data=np.reshape(np.arange(4, dtype='float32'), (1, 4, 1)), - kwargs={ - 'filters': 1, - 'kernel_size': 2, - 'dilation_rate': 1, - 'padding': 'valid', - 'kernel_initializer': 'ones', - 'use_bias': False, - }, - expected_output=[[[1], [3], [5]]]) - - def test_conv_1d(self): - batch_size = 2 - steps = 8 - input_dim = 2 - kernel_size = 3 - filters = 3 + def _run_test(self, kwargs, arg, values): + num_samples = 2 + stack_size = 3 + length = 7 - for padding in ['valid', 'same']: - for strides in [1, 2]: - if padding == 'same' and strides != 1: - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv1D, + kwargs=test_kwargs, + input_shape=(num_samples, length, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv1D, - kwargs={ - 'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'strides': strides - }, - input_shape=(batch_size, steps, input_dim)) - - def test_conv_1d_regularizers(self): + def test_conv1d(self): + kwargs = { + 'filters': 2, + 'kernel_size': 3, + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [2]) + self._run_test(kwargs, 'dilation_rate', [2]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) + self._run_test(kwargs, 'dilation_rate', [3]) + + def test_conv1d_regularizers(self): kwargs = { 'filters': 3, 'kernel_size': 3, @@ -82,7 +78,7 @@ class Convolution1DTest(test.TestCase): layer(keras.backend.variable(np.ones((1, 5, 2)))) self.assertEqual(len(layer.losses), 3) - def test_conv_1d_constraints(self): + def test_conv1d_constraints(self): k_constraint = lambda x: x b_constraint = lambda x: x @@ -103,35 +99,43 @@ class Convolution1DTest(test.TestCase): class Conv2DTest(test.TestCase): - def test_convolution_2d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 - kernel_size = (3, 2) num_row = 7 num_col = 6 - for padding in ['valid', 'same']: - for strides in [(1, 1), (2, 2)]: - if padding == 'same' and strides != (1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.SeparableConv2D, + kwargs=test_kwargs, + input_shape=(num_samples, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - # Only runs on GPU with CUDA, channels_first is not supported on CPU. - # TODO(b/62340061): Support channels_first on CPU. - if test.is_gpu_available(cuda_only=True): - testing_utils.layer_test( - keras.layers.Conv2D, - kwargs={ - 'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'strides': strides, - 'data_format': 'channels_first' - }, - input_shape=(num_samples, stack_size, num_row, num_col)) - - def test_convolution_2d_regularizers(self): + def test_conv2d(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3), + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2)]) + if test.is_gpu_available(cuda_only=True): + # Only runs on GPU with CUDA, channels_first is not supported on CPU. + # TODO(b/62340061): Support channels_first on CPU. + self._run_test(kwargs, 'data_format', ['channels_first']) + self._run_test(kwargs, 'dilation_rate', [(2, 2)]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) + + def test_conv2d_regularizers(self): kwargs = { 'filters': 3, 'kernel_size': 3, @@ -148,7 +152,7 @@ class Conv2DTest(test.TestCase): layer(keras.backend.variable(np.ones((1, 5, 5, 2)))) self.assertEqual(len(layer.losses), 3) - def test_convolution_2d_constraints(self): + def test_conv2d_constraints(self): k_constraint = lambda x: x b_constraint = lambda x: x @@ -166,51 +170,34 @@ class Conv2DTest(test.TestCase): self.assertEqual(layer.kernel.constraint, k_constraint) self.assertEqual(layer.bias.constraint, b_constraint) - def test_dilated_conv_2d(self): - num_samples = 2 - filters = 2 - stack_size = 3 - kernel_size = (3, 2) - num_row = 7 - num_col = 6 - - # Test dilation - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv2D, - kwargs={ - 'filters': filters, - 'kernel_size': kernel_size, - 'dilation_rate': (2, 2) - }, - input_shape=(num_samples, num_row, num_col, stack_size)) - class Conv2DTransposeTest(test.TestCase): - def test_conv2d_transpose(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 - num_row = 5 + num_row = 7 num_col = 6 - for padding in ['valid', 'same']: - for strides in [(1, 1), (2, 2)]: - if padding == 'same' and strides != (1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv2DTranspose, + kwargs=test_kwargs, + input_shape=(num_samples, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv2DTranspose, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides, - 'data_format': 'channels_last' - }, - input_shape=(num_samples, num_row, num_col, stack_size)) + def test_conv2dtranspose(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3), + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2)]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) def test_conv2dtranspose_regularizers(self): kwargs = { @@ -250,30 +237,32 @@ class Conv2DTransposeTest(test.TestCase): class Conv3DTransposeTest(test.TestCase): - def test_conv3d_transpose(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 - num_row = 5 + num_row = 7 num_col = 6 - depth = 4 + depth = 5 - for padding in ['valid', 'same']: - for strides in [(1, 1, 1), (2, 2, 2)]: - if padding == 'same' and strides != (1, 1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv3DTranspose, + kwargs=test_kwargs, + input_shape=(num_samples, depth, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Conv3DTranspose, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides, - 'data_format': 'channels_last' - }, - input_shape=(num_samples, depth, num_row, num_col, stack_size)) + def test_conv3dtranspose(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3, 3), + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2, 2)]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) def test_conv3dtranspose_regularizers(self): kwargs = { @@ -313,29 +302,37 @@ class Conv3DTransposeTest(test.TestCase): class SeparableConv1DTest(test.TestCase): - def test_separable_conv_1d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 6 stack_size = 3 length = 7 - strides = 1 - for padding in ['valid', 'same']: - for multiplier in [1, 2]: - if padding == 'same' and strides != 1: - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.SeparableConv1D, + kwargs=test_kwargs, + input_shape=(num_samples, length, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.SeparableConv1D, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides, - 'depth_multiplier': multiplier - }, - input_shape=(num_samples, length, stack_size)) + def test_separable_conv1d(self): + kwargs = { + 'filters': 2, + 'kernel_size': 3, + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [2]) + self._run_test(kwargs, 'dilation_rate', [2]) + self._run_test(kwargs, 'depth_multiplier', [2]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) def test_separable_conv1d_regularizers(self): kwargs = { @@ -379,30 +376,40 @@ class SeparableConv1DTest(test.TestCase): class SeparableConv2DTest(test.TestCase): - def test_separable_conv_2d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 6 stack_size = 3 num_row = 7 num_col = 6 - for padding in ['valid', 'same']: - for strides in [(1, 1), (2, 2)]: - for multiplier in [1, 2]: - if padding == 'same' and strides != (1, 1): - continue + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.SeparableConv2D, + kwargs=test_kwargs, + input_shape=(num_samples, num_row, num_col, stack_size)) - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.SeparableConv2D, - kwargs={ - 'filters': filters, - 'kernel_size': (3, 3), - 'padding': padding, - 'strides': strides, - 'depth_multiplier': multiplier - }, - input_shape=(num_samples, num_row, num_col, stack_size)) + def test_separable_conv2d(self): + kwargs = { + 'filters': 2, + 'kernel_size': 3, + } + + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [2]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) + self._run_test(kwargs, 'dilation_rate', [2]) + self._run_test(kwargs, 'depth_multiplier', [2]) + + kwargs = { + 'filters': 2, + 'kernel_size': 3, + 'padding': 'same', + } + self._run_test(kwargs, 'dilation_rate', [2]) def test_separable_conv2d_regularizers(self): kwargs = { @@ -446,33 +453,35 @@ class SeparableConv2DTest(test.TestCase): class Conv3DTest(test.TestCase): - def test_convolution_3d(self): + def _run_test(self, kwargs, arg, values): num_samples = 2 - filters = 2 stack_size = 3 + num_row = 7 + num_col = 6 + depth = 5 - input_len_dim1 = 9 - input_len_dim2 = 8 - input_len_dim3 = 8 + test_kwargs = copy.copy(kwargs) + for value in values: + test_kwargs[arg] = value + with self.test_session(use_gpu=True): + testing_utils.layer_test( + keras.layers.Conv3D, + kwargs=test_kwargs, + input_shape=(num_samples, depth, num_row, num_col, stack_size)) - for padding in ['valid', 'same']: - for strides in [(1, 1, 1), (2, 2, 2)]: - if padding == 'same' and strides != (1, 1, 1): - continue + def test_conv3d(self): + kwargs = { + 'filters': 2, + 'kernel_size': (3, 3, 3), + } - with self.test_session(use_gpu=True): - testing_utils.layer_test( - keras.layers.Convolution3D, - kwargs={ - 'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides - }, - input_shape=(num_samples, input_len_dim1, input_len_dim2, - input_len_dim3, stack_size)) - - def test_convolution_3d_regularizers(self): + self._run_test(kwargs, 'padding', ['valid', 'same']) + self._run_test(kwargs, 'strides', [(2, 2, 2)]) + self._run_test(kwargs, 'dilation_rate', [(2, 2, 2)]) + if test.is_gpu_available(cuda_only=True): + self._run_test(kwargs, 'data_format', ['channels_first']) + + def test_conv3d_regularizers(self): kwargs = { 'filters': 3, 'kernel_size': 3, @@ -490,7 +499,7 @@ class Conv3DTest(test.TestCase): layer(keras.backend.variable(np.ones((1, 5, 5, 5, 2)))) self.assertEqual(len(layer.losses), 3) - def test_convolution_3d_constraints(self): + def test_conv3d_constraints(self): k_constraint = lambda x: x b_constraint = lambda x: x diff --git a/tensorflow/python/keras/_impl/keras/testing_utils.py b/tensorflow/python/keras/_impl/keras/testing_utils.py index b889e311b3..fa1ee2fa3d 100644 --- a/tensorflow/python/keras/_impl/keras/testing_utils.py +++ b/tensorflow/python/keras/_impl/keras/testing_utils.py @@ -105,8 +105,14 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, # test in functional API x = keras.layers.Input(shape=input_shape[1:], dtype=input_dtype) y = layer(x) - assert keras.backend.dtype(y) == expected_output_dtype - + if keras.backend.dtype(y) != expected_output_dtype: + raise AssertionError('When testing layer %s, for input %s, found output ' + 'dtype=%s but expected to find %s.\nFull kwargs: %s' % + (layer_cls.__name__, + x, + keras.backend.dtype(y), + expected_output_dtype, + kwargs)) # check shape inference model = keras.models.Model(x, y) expected_output_shape = tuple( @@ -117,7 +123,15 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: - assert expected_dim == actual_dim + if expected_dim != actual_dim: + raise AssertionError( + 'When testing layer %s, for input %s, found output_shape=' + '%s but expected to find %s.\nFull kwargs: %s' % + (layer_cls.__name__, + x, + actual_output_shape, + expected_output_shape, + kwargs)) if expected_output is not None: np.testing.assert_allclose(actual_output, expected_output, rtol=1e-3) @@ -146,7 +160,15 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: - assert expected_dim == actual_dim + if expected_dim != actual_dim: + raise AssertionError( + 'When testing layer %s, for input %s, found output_shape=' + '%s but expected to find %s.\nFull kwargs: %s' % + (layer_cls.__name__, + x, + actual_output_shape, + expected_output_shape, + kwargs)) if expected_output is not None: np.testing.assert_allclose(actual_output, expected_output, rtol=1e-3) diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 689046fe78..bb10fe5e8b 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -1096,10 +1096,10 @@ class SeparableConv1D(_SeparableConv): def call(self, inputs): if self.data_format == 'channels_last': - strides = (1, 1) + self.strides + (1,) + strides = (1,) + self.strides * 2 + (1,) spatial_start_dim = 1 else: - strides = (1, 1, 1) + self.strides + strides = (1, 1) + self.strides * 2 spatial_start_dim = 2 # Explicitly broadcast inputs and kernels to 4D. diff --git a/tensorflow/python/layers/network.py b/tensorflow/python/layers/network.py index eeb3276f0c..9f16559687 100644 --- a/tensorflow/python/layers/network.py +++ b/tensorflow/python/layers/network.py @@ -977,7 +977,7 @@ class GraphNetwork(base.Layer): if context.in_graph_mode(): if layer.activity_regularizer is not None: regularization_losses = [ - layer.activity_regularizer(x) for x in computed_tensors + layer.activity_regularizer(x) for x in output_tensors ] # Apply activity regularizer if any: layer.add_loss(regularization_losses, computed_tensors) -- GitLab From 72bd433b9b6b06ae13893015361079dda992d3c8 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Thu, 15 Feb 2018 18:55:22 -0800 Subject: [PATCH 0092/2939] Add a new tag no_cuda_on_cpu_tap for excluding failing non-gpu cuda tests. PiperOrigin-RevId: 185937687 --- tensorflow/core/debug/BUILD | 5 ++++- tensorflow/core/grappler/clusters/BUILD | 5 ++++- tensorflow/core/kernels/BUILD | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index a32badef6d..40cb8353cd 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -196,7 +196,10 @@ tf_cc_test( srcs = ["debug_gateway_test.cc"], args = ["--heap_check=local"], linkstatic = tf_kernel_tests_linkstatic(), - tags = ["no_gpu"], + tags = [ + "no_cuda_on_cpu_tap", + "no_gpu", + ], deps = [ ":debug", ":debug_gateway_internal", diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD index 5b8ce373bc..b8f8e13c9a 100644 --- a/tensorflow/core/grappler/clusters/BUILD +++ b/tensorflow/core/grappler/clusters/BUILD @@ -114,7 +114,10 @@ tf_cc_test( name = "single_machine_test", srcs = ["single_machine_test.cc"], args = ["--heap_check=local"], # The GPU tracer leaks memory - tags = ["no_gpu"], + tags = [ + "no_cuda_on_cpu_tap", + "no_gpu", + ], deps = [ ":single_machine", "//tensorflow/cc:cc_ops", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index cee3c55d1a..dc93c76eae 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -987,6 +987,7 @@ tf_cuda_cc_test( name = "constant_op_test", size = "small", srcs = ["constant_op_test.cc"], + tags = ["no_cuda_on_cpu_tap"], deps = [ ":constant_op", ":ops_testutil", -- GitLab From c6cd20dbcaaa601977d1b63ab17e04d137de5133 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Thu, 15 Feb 2018 19:01:57 -0800 Subject: [PATCH 0093/2939] Add node converter for FusedBatchNorm op --- .../contrib/tensorrt/convert/convert_graph.cc | 9 ++- .../contrib/tensorrt/convert/convert_nodes.cc | 67 +++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 31ba30b2d9..8c0aada355 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -68,9 +68,12 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { "Mean", "AvgPool", "ConcatV2", - "DepthwiseConv2dNative" //, "MatMul", - //"Reshape" - // TODO(ben,jie): ... + "DepthwiseConv2dNative", + "FusedBatchNorm", + "FusedBatchNormV2", + //, "MatMul", + //"Reshape" + // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index ea0eb480f2..e3b16126f1 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -276,6 +276,17 @@ template <> tensorflow::DataType TFAttrs::get(string key) const { return this->at(key)->type(); } + +template <> +float TFAttrs::get(string key) const { + return this->at(key)->f(); +} + +template <> +bool TFAttrs::get(string key) const { + return this->at(key)->b(); +} + // TODO(jie): reorder4 & reorder2 should be merged? template void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, @@ -1703,6 +1714,60 @@ tensorflow::Status ConvertConcat(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertFusedBatchNorm(Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + TFAttrs attrs(node_def); + float epsilon = attrs.get("epsilon"); + auto data_format = attrs.get("data_format"); + if (data_format != "NCHW" ) { + return tensorflow::errors::Unimplemented( + "only data_format=NCHW is supported, at " + node_def.name()); + } + bool is_training = attrs.get("is_training"); + if (is_training) { + return tensorflow::errors::Unimplemented( + "only is_training=false is supported, at " + node_def.name()); + } + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + TRT_ShapedWeights scale_weights = inputs.at(1).weights(); + TRT_ShapedWeights offset_weights = inputs.at(2).weights(); + TRT_ShapedWeights mean_weights = inputs.at(3).weights(); + TRT_ShapedWeights variance_weights = inputs.at(4).weights(); + TRT_ShapedWeights dummy_power_weights(scale_weights.type_); + TRT_ShapedWeights combined_scale_weights = + ctx.get_temp_weights_like(scale_weights); + TRT_ShapedWeights combined_offset_weights = + ctx.get_temp_weights_like(offset_weights); + size_t nweight = scale_weights.count(); + if (scale_weights.type_ != tensorflow::DataType::DT_FLOAT || + offset_weights.type_ != tensorflow::DataType::DT_FLOAT || + mean_weights.type_ != tensorflow::DataType::DT_FLOAT || + variance_weights.type_ != tensorflow::DataType::DT_FLOAT) { + return tensorflow::errors::Unimplemented( + "only float32 weights data type is supported, at " + node_def.name()); + } + for (size_t i=0; i(scale_weights.GetValues()))[i]; + float offset = (static_cast(offset_weights.GetValues()))[i]; + float mean = (static_cast(mean_weights.GetValues()))[i]; + float variance = (static_cast(variance_weights.GetValues()))[i]; + float& combined_scale_ref = const_cast( + static_cast(combined_scale_weights.GetValues()))[i]; + float& combined_offset_ref = const_cast( + static_cast(combined_offset_weights.GetValues()))[i]; + combined_scale_ref = scale / sqrtf(variance + epsilon); + combined_offset_ref = offset - mean * combined_scale_ref; + } + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, + combined_offset_weights, combined_scale_weights, dummy_power_weights); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + tensorflow::Status ConvertMatMul(Converter& ctx, tensorflow::NodeDef const& node_def, std::vector const& inputs, @@ -1827,6 +1892,8 @@ void Converter::register_op_converters() { op_registry_["ConcatV2"] = ConvertConcat; op_registry_["MatMul"] = ConvertMatMul; op_registry_["Reshape"] = ConvertReshape; + op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; + op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; } } // namespace -- GitLab From 98cf337e781977fd464c574656699b3181eddf19 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Thu, 15 Feb 2018 19:12:05 -0800 Subject: [PATCH 0094/2939] TFE SPINN example: use tensor instead of numpy array in inference output. PiperOrigin-RevId: 185939805 --- .../contrib/eager/python/examples/spinn/spinn_test.py | 2 +- third_party/examples/eager/spinn/README.md | 4 ++-- third_party/examples/eager/spinn/spinn.py | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py index eefc06d90d..081b0af14f 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py +++ b/tensorflow/contrib/eager/python/examples/spinn/spinn_test.py @@ -369,7 +369,7 @@ class SpinnTest(test_util.TensorFlowTestCase): inference_sentences=("( foo ( bar . ) )", "( bar ( foo . ) )")) logits = spinn.train_or_infer_spinn( embed, word2index, None, None, None, config) - self.assertEqual(np.float32, logits.dtype) + self.assertEqual(tf.float32, logits.dtype) self.assertEqual((3,), logits.shape) def testInferSpinnThrowsErrorIfOnlyOneSentenceIsSpecified(self): diff --git a/third_party/examples/eager/spinn/README.md b/third_party/examples/eager/spinn/README.md index 335c0fa3b5..7f477d1920 100644 --- a/third_party/examples/eager/spinn/README.md +++ b/third_party/examples/eager/spinn/README.md @@ -75,7 +75,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth should all be separated by spaces. For instance, ```bash - pythons spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ + python spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ --inference_premise '( ( The dog ) ( ( is running ) . ) )' \ --inference_hypothesis '( ( The dog ) ( moves . ) )' ``` @@ -93,7 +93,7 @@ Other eager execution examples can be found under [tensorflow/contrib/eager/pyth By contrast, the following sentence pair: ```bash - pythons spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ + python spinn.py --data_root /tmp/spinn-data --logdir /tmp/spinn-logs \ --inference_premise '( ( The dog ) ( ( is running ) . ) )' \ --inference_hypothesis '( ( The dog ) ( rests . ) )' ``` diff --git a/third_party/examples/eager/spinn/spinn.py b/third_party/examples/eager/spinn/spinn.py index 38ba48d501..8a1c7db2ea 100644 --- a/third_party/examples/eager/spinn/spinn.py +++ b/third_party/examples/eager/spinn/spinn.py @@ -44,7 +44,6 @@ import os import sys import time -import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf @@ -567,7 +566,7 @@ def train_or_infer_spinn(embed, Returns: If `config.inference_premise ` and `config.inference_hypothesis` are not `None`, i.e., inference mode: the logits for the possible labels of the - SNLI data set, as numpy array of three floats. + SNLI data set, as a `Tensor` of three floats. else: The trainer object. Raises: @@ -626,8 +625,8 @@ def train_or_infer_spinn(embed, inference_logits = model( # pylint: disable=not-callable tf.constant(prem), tf.constant(prem_trans), tf.constant(hypo), tf.constant(hypo_trans), training=False) - inference_logits = np.array(inference_logits[0][1:]) - max_index = np.argmax(inference_logits) + inference_logits = inference_logits[0][1:] + max_index = tf.argmax(inference_logits) print("\nInference logits:") for i, (label, logit) in enumerate( zip(data.POSSIBLE_LABELS, inference_logits)): -- GitLab From 4de211808b81a2af42a38b011a19ab5ef67795bc Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 15 Feb 2018 19:31:11 -0800 Subject: [PATCH 0095/2939] Error out when building XLA's CPU and GPU backends with fast-math In an ideal world this won't make a difference since the compiler should be disciplined about not leaking host-level optimization artifacts into generated code. However, I think this provides some defense-in-depth in preventing fast-math optimization on the host side from messing up floating point constants etc. we want to embed into generated code. PiperOrigin-RevId: 185941549 --- tensorflow/compiler/xla/service/llvm_compiler.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index f98fc0400a..68c35c0c1f 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -15,6 +15,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_compiler.h" +#ifdef __FAST_MATH__ +#error "Don't build XLA with -ffast-math" +#endif + namespace xla { StatusOr>> LLVMCompiler::Compile( std::vector> modules, -- GitLab From 480fbfda31e4b1fc10d537264a0c9f1c9c5994f4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 19:34:18 -0800 Subject: [PATCH 0096/2939] Add tuple targets to the context handling mechanism in templates. PiperOrigin-RevId: 185941851 --- tensorflow/contrib/py2tf/pyct/templates.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/py2tf/pyct/templates.py index c40e4d0fb7..6ee6c0c5ce 100644 --- a/tensorflow/contrib/py2tf/pyct/templates.py +++ b/tensorflow/contrib/py2tf/pyct/templates.py @@ -68,6 +68,10 @@ class ReplaceTransformer(gast.NodeTransformer): if isinstance(node, gast.Attribute): self._set_inner_child_context(node.value, ctx) node.ctx = gast.Load() + elif isinstance(node, gast.Tuple): + for e in node.elts: + self._set_inner_child_context(e, ctx) + node.ctx = ctx elif isinstance(node, gast.Name): node.ctx = ctx elif isinstance(node, (gast.Str, gast.Num)): -- GitLab From 5827bdb5bd8f83f0617693bbe2caca253a13c7ed Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 19:47:47 -0800 Subject: [PATCH 0097/2939] Fix handling of types in RNN state import. Sanitize TF node names. PiperOrigin-RevId: 185942921 --- .../contrib/lite/toco/export_tensorflow.cc | 10 +-- .../contrib/lite/toco/import_tensorflow.cc | 2 +- tensorflow/contrib/lite/toco/model.h | 6 +- tensorflow/contrib/lite/toco/toco_tooling.cc | 8 ++- tensorflow/contrib/lite/toco/tooling_util.cc | 64 +++++++++++++++---- tensorflow/contrib/lite/toco/tooling_util.h | 17 +++++ 6 files changed, 85 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 7dc36a6d13..570cc7943b 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -1236,8 +1236,9 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op, // Write weights const string weights_output = base + "weights"; CHECK(model.HasArray(src_op.inputs[LstmCellOperator::WEIGHTS_INPUT])); - const auto& weights_array = - model.GetArray(src_op.inputs[LstmCellOperator::WEIGHTS_INPUT]); + const string weights_name = WalkUpToConstantArray( + model, src_op.inputs[LstmCellOperator::WEIGHTS_INPUT]); + const auto& weights_array = model.GetArray(weights_name); // Convert 4D FullyConnected weights into 2D matrix const auto& weights_shape = weights_array.shape(); CHECK_EQ(weights_shape.dimensions_count(), 2); @@ -1262,8 +1263,9 @@ void ConvertLstmCellOperator(const Model& model, const LstmCellOperator& src_op, // Write biases const string biases_output = base + "biases"; CHECK(model.HasArray(src_op.inputs[LstmCellOperator::BIASES_INPUT])); - const auto& bias_array = - model.GetArray(src_op.inputs[LstmCellOperator::BIASES_INPUT]); + const string bias_name = WalkUpToConstantArray( + model, src_op.inputs[LstmCellOperator::BIASES_INPUT]); + const auto& bias_array = model.GetArray(bias_name); // TODO(b/62904716) Bias arrays should be 1-D, and used directly. Shape bias_shape_1d = bias_array.shape(); UnextendShape(&bias_shape_1d, 1); diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 330506200c..9c01b67420 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1582,7 +1582,7 @@ void ConvertFloorDivOperator(const NodeDef& node, void ConvertFloorModOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { - CHECK(node.op() == "FloorMod"); + CHECK_EQ(node.op(), "FloorMod"); CheckInputsCount(node, tf_import_flags, 2); auto* op = new FloorModOperator; op->inputs.push_back(node.input(0)); diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 2bcd6da3da..c55bf664f8 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -160,17 +160,17 @@ enum class AxesOrder { // may be involved only in debug-only subgraphs that we may not be interested // in actually supporting). enum class ArrayDataType { - kNone, + kNone, // 0 kBool, kFloat, kInt8, kUint8, - kInt16, + kInt16, // 5 kUint16, kInt32, kUint32, kInt64, - kUint64, + kUint64, // 10 kString }; diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 864c646a8c..1b836fbc15 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -189,6 +189,11 @@ std::unique_ptr Import(const TocoFlags& toco_flags, } void Transform(const TocoFlags& toco_flags, Model* model) { + // Clean up after import. + SetFinalDataTypeOnInputs(toco_flags, model); + UseArraysExtraInfo(model); + FinishBuildingRNNStates(model); + const FileFormat output_format = toco_flags.output_format(); const IODataType inference_type = toco_flags.inference_type(); @@ -200,9 +205,6 @@ void Transform(const TocoFlags& toco_flags, Model* model) { << "Quantized inference is not allowed with float inputs."; } - SetFinalDataTypeOnInputs(toco_flags, model); - UseArraysExtraInfo(model); - // Remove unused ops before performing any other optimizations. This is to // stop optimizations from crossing the input/output boundaries. For example // this will stop BatchNorm fusing if the output node is in between a conv diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 249c03ca3c..dcb409c84d 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -25,6 +25,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/str_replace.h" +#include "absl/strings/str_split.h" #include "tensorflow/contrib/lite/toco/dump_graphviz.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/toco_graphviz_dump_options.h" @@ -633,6 +634,14 @@ bool IsConstantParameterArray(const Model& model, const string& name) { } namespace { +// Take an array name, which may be something like "name:3_5" and make it +// acceptable as a TF node name, say "name_3_5"; +string SanitizeNameForTFNode(const string& array_name) { + auto node_name = array_name; + std::replace(node_name.begin(), node_name.end(), ':', '_'); + return node_name; +} + void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags) { for (const auto& input_array : model_flags.input_arrays()) { for (const string& output_array : model_flags.output_arrays()) { @@ -796,7 +805,10 @@ void FixNoOrphanedArray(Model* model) { } } -void CheckArrayFieldsConsistent(const Model& model) { +// Apply checks to arrays individually (for-each fashion). +// +// Check consistency of array fields, check name. +void CheckEachArray(const Model& model) { for (const auto& array_entry : model.GetArrayMap()) { const auto& array = array_entry.second; if (array->has_shape()) { @@ -811,6 +823,18 @@ void CheckArrayFieldsConsistent(const Model& model) { if (array->buffer) { CHECK(array->buffer->type == array->data_type); } + + // Check name. Either "name_with_suffix_8", "name_with_port:3", but not + // "name_with_both:3_8". + const string& name = array_entry.first; + auto colon_pos = name.find_first_of(":"); + if (colon_pos != string::npos) { + CHECK_EQ(name.substr(colon_pos + 1).find_first_not_of("0123456789"), + string::npos) + << "Array name must only have digits after colon"; + } + CHECK_GT(colon_pos, 0) + << "First character of array name must not be a colon."; } } @@ -959,7 +983,7 @@ void CheckInvariants(const Model& model) { CheckNonAsciiIOArrays(model.flags); CheckNoMissingArray(model); CheckNoOrphanedArray(model); - CheckArrayFieldsConsistent(model); + CheckEachArray(model); CheckOperatorOrdering(model); } @@ -1051,9 +1075,6 @@ void CreateOrCheckRnnStateArray(const string& name, int size, Model* model) { if (array.has_shape()) { num_dims = array.shape().dimensions_count(); } - CHECK(array.data_type == ArrayDataType::kFloat || - array.data_type == ArrayDataType::kNone); - array.data_type = ArrayDataType::kFloat; if (!array.has_shape() && num_dims >= 0) { Shape* shape = array.mutable_shape(); std::vector dims; @@ -1077,7 +1098,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { } } if (!dst_input_array) { - // specified_input_array from model_flags is not found in model->flags. + // Specified_input_array from model_flags is not found in model->flags. // Match a name-less specified input array when there can be no ambiguity // as there is only 1 input array. if (model->flags.input_arrays_size() == 1 && @@ -1384,19 +1405,23 @@ bool IsAllocatableTransientArray(const Model& model, const string& array_name) { } string AvailableArrayName(const Model& model, const string& name) { - if (!model.HasArray(name) && !model.IsOptionalArray(name)) { - return name; + string sanitized_name = SanitizeNameForTFNode(name); + if (!model.HasArray(sanitized_name) && + !model.IsOptionalArray(sanitized_name)) { + return sanitized_name; } const int kNumSuffixesToTry = 1000; for (int i = 0; i < kNumSuffixesToTry; i++) { - const string& name_with_suffix = toco::port::StringF("%s_%d", name, i); + const string& name_with_suffix = + toco::port::StringF("%s_%d", sanitized_name, i); if (!model.HasArray(name_with_suffix) && !model.IsOptionalArray(name_with_suffix)) { return name_with_suffix; } } - LOG(FATAL) << "Could not find an available array name starting with " << name - << ". Tried " << kNumSuffixesToTry << " suffixes, all were taken!"; + LOG(FATAL) << "Could not find an available array name starting with " + << sanitized_name << ". Tried " << kNumSuffixesToTry + << " suffixes, all were taken!"; return ""; } @@ -1795,6 +1820,23 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) { } } +void FinishBuildingRNNStates(Model* model) { + for (const auto& rnn_state : model->flags.rnn_states()) { + if (!model->HasArray(rnn_state.back_edge_source_array()) || + !model->HasArray(rnn_state.state_array())) { + CHECK(model->HasArray(rnn_state.back_edge_source_array())); + CHECK(model->HasArray(rnn_state.state_array())); + continue; + } + const auto& src_array = model->GetArray(rnn_state.back_edge_source_array()); + auto& dst_array = model->GetArray(rnn_state.state_array()); + if (src_array.data_type == ArrayDataType::kNone && + dst_array.data_type == ArrayDataType::kNone) { + dst_array.data_type = ArrayDataType::kFloat; + } + } +} + void UseArraysExtraInfo(Model* model) { for (const auto& entry : model->flags.arrays_extra_info().entries()) { QCHECK(model->HasArray(entry.name())) diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index a2dde09156..0aaa0f6a21 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -299,6 +299,23 @@ void CheckFinalDataTypesSatisfied(const Model& model); ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type); +// The process of building models varies according to the import format. +// +// (a) In some cases, such as model-proto format, the model should be fully +// specified. In these cases, no extra action should be taken by this function. +// (b) In other cases, such as TF graphdef format, the desired types of RNN +// arrays are not specified directly in the model, neither can they be inferred. +// However, we can set the types of RNN destination arrays to float. This breaks +// any cycles such as when resolution of the type of an RNN source array depends +// on the type of its destination array. +// +// This function is applied after the main import, after resolution of flags and +// after application of ArraysExtraInfo. It only defaults destination RNN arrays +// to float. If the model is subsequently quantized, it is assumed that the +// model contains sufficient information for that to be completed. If it is +// already quantized, then case (a) should hold. +void FinishBuildingRNNStates(Model* model); + void UseArraysExtraInfo(Model* model); } // namespace toco -- GitLab From 018980f7aa02d023ad4574fcb92b8be7ff3cfbbb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Feb 2018 19:52:03 -0800 Subject: [PATCH 0098/2939] optimized quantized softmax PiperOrigin-RevId: 185943132 --- .../internal/optimized/optimized_ops.h | 237 +++++++++++++----- 1 file changed, 178 insertions(+), 59 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index cd52385f41..7af07e5d0c 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -2866,74 +2866,193 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, using FixedPointAccum = gemmlowp::FixedPoint; using FixedPoint0 = gemmlowp::FixedPoint; - gemmlowp::ScopedProfilingLabel label("Softmax"); + gemmlowp::ScopedProfilingLabel label("Softmax/8bit"); const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); const int height = MatchingArraySize(input_dims, 2, output_dims, 2); const int width = MatchingArraySize(input_dims, 1, output_dims, 1); const int depth = MatchingArraySize(input_dims, 0, output_dims, 0); - for (int b = 0; b < batches; ++b) { - for (int x = 0; x < width; ++x) { - for (int y = 0; y < height; ++y) { - uint8 max_in_row = 0; - for (int c = 0; c < depth; ++c) { - max_in_row = - std::max(max_in_row, input_data[Offset(input_dims, c, x, y, b)]); - } + const int outer_size = batches * height * width; - FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); - for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[Offset(input_dims, c, x, y, b)]) - - max_in_row; - if (input_diff >= diff_min) { - const int32 input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - sum_of_exps = - sum_of_exps + gemmlowp::Rescale( - exp_on_negative_values(scaled_diff_f8)); - } + for (int b = 0; b < outer_size; ++b) { + const uint8* input_data_ptr = input_data + b * depth; + uint8* output_data_ptr = output_data + b * depth; + + // Determine the largest entry in the current row + uint8 max_in_row = 0; + { + int c = 0; +#ifdef USE_NEON + uint8x16_t max16_0 = vdupq_n_u8(0); + uint8x16_t max16_1 = vdupq_n_u8(0); + for (; c <= depth - 32; c += 32) { + max16_0 = vmaxq_u8(max16_0, vld1q_u8(input_data_ptr + c + 0)); + max16_1 = vmaxq_u8(max16_1, vld1q_u8(input_data_ptr + c + 16)); + } + uint8x16_t max16 = vmaxq_u8(max16_0, max16_1); + if (c <= depth - 16) { + max16 = vmaxq_u8(max16, vld1q_u8(input_data_ptr + c)); + c += 16; + } + uint8x8_t max8 = vmax_u8(vget_low_u8(max16), vget_high_u8(max16)); + if (c <= depth - 8) { + max8 = vmax_u8(max8, vld1_u8(input_data_ptr + c)); + c += 8; + } + uint8x8_t max4 = vmax_u8(max8, vext_u8(max8, max8, 4)); + uint8x8_t max2 = vmax_u8(max4, vext_u8(max4, max4, 2)); + uint8x8_t max1 = vpmax_u8(max2, max2); + max_in_row = vget_lane_u8(max1, 0); +#endif + for (; c < depth; ++c) { + max_in_row = std::max(max_in_row, input_data_ptr[c]); + } + } + +#ifdef USE_NEON + using FixedPointAccumInt32x4 = + gemmlowp::FixedPoint; + using FixedPointScaledDiffInt32x4 = + gemmlowp::FixedPoint; + using FixedPoint0Int32x4 = gemmlowp::FixedPoint; + FixedPoint0Int32x4 input_beta_multiplier_f0 = + FixedPoint0Int32x4::FromScalarRaw(input_beta_multiplier); + int16x8_t max_in_row_s16 = vdupq_n_s16(max_in_row); +#endif + + // Compute the sum of exponentials of the differences of entries in the + // current row from the largest entry in the current row. + FixedPointAccum sum_of_exps = FixedPointAccum::Zero(); + { + int c = 0; +#ifdef USE_NEON + int32x4_t diff_min_s32 = vdupq_n_s32(diff_min); + FixedPointAccumInt32x4 sum_of_exps_0 = FixedPointAccumInt32x4::Zero(); + FixedPointAccumInt32x4 sum_of_exps_1 = FixedPointAccumInt32x4::Zero(); + FixedPointAccumInt32x4 zeros = FixedPointAccumInt32x4::Zero(); + for (; c <= depth - 8; c += 8) { + uint16x8_t input_u16 = vmovl_u8(vld1_u8(input_data_ptr + c)); + int16x8_t input_diff_s16 = + vsubq_s16(vreinterpretq_s16_u16(input_u16), max_in_row_s16); + int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); + int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); + int32x4_t mask_0 = vcgeq_s32(input_diff_s32_0, diff_min_s32); + int32x4_t mask_1 = vcgeq_s32(input_diff_s32_1, diff_min_s32); + FixedPointScaledDiffInt32x4 scaled_diff_0 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_0, input_beta_left_shift)); + FixedPointScaledDiffInt32x4 scaled_diff_1 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_1, input_beta_left_shift)); + FixedPointAccumInt32x4 exps_0 = + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_0)); + FixedPointAccumInt32x4 exps_1 = + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_1)); + FixedPointAccumInt32x4 masked_exps_0 = + SelectUsingMask(mask_0, exps_0, zeros); + FixedPointAccumInt32x4 masked_exps_1 = + SelectUsingMask(mask_1, exps_1, zeros); + sum_of_exps_0 = sum_of_exps_0 + masked_exps_0; + sum_of_exps_1 = sum_of_exps_1 + masked_exps_1; + } + int32x4_t sum_of_exps_reduced_4 = (sum_of_exps_0 + sum_of_exps_1).raw(); + int32x2_t sum_of_exps_reduced_2 = + vadd_s32(vget_low_s32(sum_of_exps_reduced_4), + vget_high_s32(sum_of_exps_reduced_4)); + int32x2_t sum_of_exps_reduced_1 = + vpadd_s32(sum_of_exps_reduced_2, sum_of_exps_reduced_2); + sum_of_exps = + FixedPointAccum::FromRaw(vget_lane_s32(sum_of_exps_reduced_1, 0)); +#endif + for (; c < depth; ++c) { + int32 input_diff = static_cast(input_data_ptr[c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); + sum_of_exps = + sum_of_exps + gemmlowp::Rescale( + exp_on_negative_values(scaled_diff_f8)); } + } + } - int32 fixed_sum_of_exps = sum_of_exps.raw(); - // TODO(starka): Use a NEON intrinsic like vclzq_u32 instead. - int headroom_plus_one = - __builtin_clz(static_cast(fixed_sum_of_exps)); - // This is the number of bits to the left of the binary point above 1.0. - // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and - // no later adjustment will be needed. - int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; - int32 shifted_sum_minus_one = static_cast( - (static_cast(fixed_sum_of_exps) << headroom_plus_one) - - (static_cast(1) << 31)); - - FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( - FixedPoint0::FromRaw(shifted_sum_minus_one)); + // Compute the fixed-point multiplier and shift that we need to apply to + // perform a division by the above-computed sum-of-exponentials. + int32 fixed_sum_of_exps = sum_of_exps.raw(); + int headroom_plus_one = + __builtin_clz(static_cast(fixed_sum_of_exps)); + // This is the number of bits to the left of the binary point above 1.0. + // Consider fixed_sum_of_exps=1.25. In that case shifted_scale=0.8 and + // no later adjustment will be needed. + int num_bits_over_unit = kAccumulationIntegerBits - headroom_plus_one; + int32 shifted_sum_minus_one = static_cast( + (static_cast(fixed_sum_of_exps) << headroom_plus_one) - + (static_cast(1) << 31)); + FixedPoint0 shifted_scale = gemmlowp::one_over_one_plus_x_for_x_in_0_1( + FixedPoint0::FromRaw(shifted_sum_minus_one)); + + // Compute the quotients of exponentials of differences of entries in the + // current row from the largest entry, over the previously-computed sum of + // exponentials. + { + int c = 0; +#ifdef USE_NEON + int16x8_t diff_min_s16 = vdupq_n_s16(diff_min); + for (; c <= depth - 8; c += 8) { + uint16x8_t input_u16 = vmovl_u8(vld1_u8(input_data_ptr + c)); + int16x8_t input_diff_s16 = + vsubq_s16(vreinterpretq_s16_u16(input_u16), max_in_row_s16); + int32x4_t input_diff_s32_0 = vmovl_s16(vget_low_s16(input_diff_s16)); + int32x4_t input_diff_s32_1 = vmovl_s16(vget_high_s16(input_diff_s16)); + uint8x8_t mask = vmovn_u16( + vreinterpretq_u16_s16(vcgeq_s16(input_diff_s16, diff_min_s16))); + FixedPointScaledDiffInt32x4 scaled_diff_0 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_0, input_beta_left_shift)); + FixedPointScaledDiffInt32x4 scaled_diff_1 = + input_beta_multiplier_f0 * + FixedPointScaledDiffInt32x4::FromRaw( + gemmlowp::ShiftLeft(input_diff_s32_1, input_beta_left_shift)); + FixedPoint0Int32x4 exp_0 = exp_on_negative_values(scaled_diff_0); + FixedPoint0Int32x4 exp_1 = exp_on_negative_values(scaled_diff_1); + int32x4_t output_s32_0 = gemmlowp::RoundingDivideByPOT( + vqrdmulhq_n_s32(exp_0.raw(), shifted_scale.raw()), + num_bits_over_unit + 31 - 8); + int32x4_t output_s32_1 = gemmlowp::RoundingDivideByPOT( + vqrdmulhq_n_s32(exp_1.raw(), shifted_scale.raw()), + num_bits_over_unit + 31 - 8); + int16x8_t output_s16 = + vcombine_s16(vqmovn_s32(output_s32_0), vqmovn_s32(output_s32_1)); + uint8x8_t output_u8 = vqmovun_s16(output_s16); + uint8x8_t masked_output = vbsl_s16(mask, output_u8, vdup_n_u8(0)); + vst1_u8(output_data_ptr + c, masked_output); + } +#endif + for (; c < depth; ++c) { + int32 input_diff = static_cast(input_data_ptr[c]) - max_in_row; + if (input_diff >= diff_min) { + const int32 input_diff_rescaled = + MultiplyByQuantizedMultiplierGreaterThanOne( + input_diff, input_beta_multiplier, input_beta_left_shift); + const FixedPointScaledDiff scaled_diff_f8 = + FixedPointScaledDiff::FromRaw(input_diff_rescaled); - for (int c = 0; c < depth; ++c) { - int32 input_diff = - static_cast(input_data[Offset(input_dims, c, x, y, b)]) - - max_in_row; - if (input_diff >= diff_min) { - const int32 input_diff_rescaled = - MultiplyByQuantizedMultiplierGreaterThanOne( - input_diff, input_beta_multiplier, input_beta_left_shift); - const FixedPointScaledDiff scaled_diff_f8 = - FixedPointScaledDiff::FromRaw(input_diff_rescaled); - - FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); - int32 unsat_output = gemmlowp::RoundingDivideByPOT( - (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); - - output_data[Offset(output_dims, c, x, y, b)] = - std::max(std::min(unsat_output, 255), 0); - - } else { - output_data[Offset(output_dims, c, x, y, b)] = 0; - } + FixedPoint0 exp_in_0 = exp_on_negative_values(scaled_diff_f8); + int32 unsat_output = gemmlowp::RoundingDivideByPOT( + (shifted_scale * exp_in_0).raw(), num_bits_over_unit + 31 - 8); + + output_data_ptr[c] = std::max(std::min(unsat_output, 255), 0); + + } else { + output_data_ptr[c] = 0; } } } -- GitLab From cae5adce103849287e48a122b203d71600c7a6ff Mon Sep 17 00:00:00 2001 From: Alina Sbirlea Date: Thu, 15 Feb 2018 20:18:11 -0800 Subject: [PATCH 0099/2939] Automated g4 rollback of changelist 185891869 PiperOrigin-RevId: 185944719 --- .../xla/service/algebraic_simplifier.cc | 141 ---------- .../xla/service/algebraic_simplifier_test.cc | 203 --------------- .../compiler/xla/tests/dot_operation_test.cc | 246 ------------------ 3 files changed, 590 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 6f6c2391f3..fb857559f9 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -284,8 +284,6 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { const Shape& dot_shape, HloInstruction* lhs, int64 lhs_contracting_dim, HloInstruction* rhs, int64 rhs_contracting_dim, bool swapped); - StatusOr OptimizeDotOfGather(HloInstruction* dot); - // Current HloComputation instance the AlgebraicSimplifierVisitor is // traversing. HloComputation* computation_; @@ -919,134 +917,6 @@ StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfConcatHelper( return add_result; } -StatusOr AlgebraicSimplifierVisitor::OptimizeDotOfGather( - HloInstruction* dot) { - const DotDimensionNumbers& dnums = dot->dot_dimension_numbers(); - if (dnums.lhs_contracting_dimensions_size() != 1 || - dnums.rhs_contracting_dimensions_size() != 1 || - dnums.lhs_batch_dimensions_size() != 0 || - dnums.rhs_batch_dimensions_size() != 0 || - dot->shape().dimensions_size() != 2) { // dot output 2D - VLOG(10) << "DotOfGather: Can only optimize 2D, non-batch dot operations."; - return nullptr; - } - - // Optimize either dot(DS(ctA), ctB)) or dot(ctB, DS(ctA)). - // Currently a Gather is a DynamicSlice. - auto is_dynamic_slice_constant_combination = - [](HloInstruction* a, HloInstruction* b, int a_contracting_dimension) { - // First operand is a DynamicSlice(Constant). - if (a->opcode() != HloOpcode::kDynamicSlice) { - return false; - } - auto* dynamic_slice_op = a->operand(0); - if (dynamic_slice_op->opcode() != HloOpcode::kConstant) { - return false; - } - // Second operand is a Constant. - if (b->opcode() != HloOpcode::kConstant) { - return false; - } - // The DynamicSlice output is a vector. - const Shape& dynamic_slice_shape = a->shape(); - if (dynamic_slice_shape.dimensions(1 - a_contracting_dimension) != 1) { - return false; - } - // Constant size is the same before and after slice in the contracting - // dimension, otherwise we either must precompute for all possible slice - // indices or dot is invalid. - const Shape& dynamic_slice_op_shape = dynamic_slice_op->shape(); - if (dynamic_slice_op_shape.dimensions(a_contracting_dimension) != - dynamic_slice_shape.dimensions(a_contracting_dimension)) { - return false; - } - return true; - }; - - HloInstruction* lhs = dot->mutable_operand(0); - HloInstruction* rhs = dot->mutable_operand(1); - int lhs_contracting_dimension = dnums.lhs_contracting_dimensions(0); - int rhs_contracting_dimension = dnums.rhs_contracting_dimensions(0); - - if (!is_dynamic_slice_constant_combination( - lhs, rhs, /*a_contracting_dimension=*/lhs_contracting_dimension) && - !is_dynamic_slice_constant_combination( - rhs, lhs, /*a_contracting_dimension=*/rhs_contracting_dimension)) { - VLOG(10) << "DotOfGather: Can only optimize dot(DS(ctA), ctB)) or " - "dot(ctB, DS(ctA)), where the two constants have equal " - "contracting dimensions."; - return nullptr; - } - - // LHS is DynamicSlice: - // input: dot(DS(ctA), ctB)) - // where DS(ctA) = DS({M x K}, {start, 0}, {1, K}) and ctB = {K x N}. - // => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. - // output: DS(dot(ctA, ctB)) - // => output dimensions: DS ({M x N}, {start, 0}, {1, N}) => {1 x N}. - - // RHS is DynamicSlice: - // input: dot(ctA, DS(ctB)) - // where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, start}, {K, 1}). - // => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. - // output: DS(dot(ctA, ctB)) - // => output dimensions: DS ({M x N}, {0, start}, {M, 1}) => {M x 1}. - - bool lhs_is_dynamic_slice = lhs->opcode() == HloOpcode::kDynamicSlice; - - // ctA: - HloInstruction* left_operand = - lhs_is_dynamic_slice ? lhs->mutable_operand(0) : lhs; - // ctB: - HloInstruction* right_operand = - lhs_is_dynamic_slice ? rhs : rhs->mutable_operand(0); - // Build ctA x ctB. - const int m = left_operand->shape().dimensions(1 - lhs_contracting_dimension); - const int n = - right_operand->shape().dimensions(1 - rhs_contracting_dimension); - auto memoized_shape = ShapeUtil::MakeShape(F32, {m, n}); - auto* memoized_inst = computation_->AddInstruction(HloInstruction::CreateDot( - memoized_shape, left_operand, right_operand, dnums)); - // Get pair {start, 0} or {0, start}. - HloInstruction* original_start_indices = - lhs_is_dynamic_slice ? lhs->mutable_operand(1) : rhs->mutable_operand(1); - // Position of start: - int index_of_non_zero_start = lhs_is_dynamic_slice - ? 1 - lhs_contracting_dimension - : 1 - rhs_contracting_dimension; - // Position of zero: - int index_of_zero_start = 1 - index_of_non_zero_start; - - // Slice out start and 0 components and reorder if necessary. - auto indices_type = original_start_indices->shape().element_type(); - Shape s_shape = ShapeUtil::MakeShape(indices_type, {1}); - Shape d_shape = ShapeUtil::MakeShape(indices_type, {2}); - HloInstruction* non_zero_start = - computation_->AddInstruction(HloInstruction::CreateSlice( - s_shape, original_start_indices, {index_of_non_zero_start}, - {index_of_non_zero_start + 1}, {1})); - HloInstruction* zero_start = - computation_->AddInstruction(HloInstruction::CreateSlice( - s_shape, original_start_indices, {index_of_zero_start}, - {index_of_zero_start + 1}, {1})); - HloInstruction* new_start_indices = - lhs_is_dynamic_slice - ? computation_->AddInstruction(HloInstruction::CreateConcatenate( - d_shape, {non_zero_start, zero_start}, 0)) - : computation_->AddInstruction(HloInstruction::CreateConcatenate( - d_shape, {zero_start, non_zero_start}, 0)); - - // Build DynamicSlice(ctA x ctB). - const int new_slice_m = lhs_is_dynamic_slice ? 1 : m; - const int new_slice_n = lhs_is_dynamic_slice ? n : 1; - auto* memoized_lookup = - computation_->AddInstruction(HloInstruction::CreateDynamicSlice( - dot->shape(), memoized_inst, new_start_indices, - {new_slice_m, new_slice_n})); - - return memoized_lookup; -} - Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { auto lhs = dot->mutable_operand(0); auto rhs = dot->mutable_operand(1); @@ -1076,17 +946,6 @@ Status AlgebraicSimplifierVisitor::HandleDot(HloInstruction* dot) { return ReplaceInstruction(dot, dot_of_concat_optimized); } - // Simplify dot(ConstA, Gather(Index, ConstB)) to: - // Gather(Index, dot*(ConstA, ConstB)), where dot* is an appropriately - // batched version of dot. - TF_ASSIGN_OR_RETURN(HloInstruction * dot_of_gather_optimized, - OptimizeDotOfGather(dot)); - if (dot_of_gather_optimized) { - VLOG(10) << "Replaced dot(constA, gather(i, constB)) with " - "gather(i, dot*(constA, constB))"; - return ReplaceInstruction(dot, dot_of_gather_optimized); - } - if (enable_dot_strength_reduction_ && !is_layout_sensitive_) { TF_ASSIGN_OR_RETURN(bool did_strength_reduction, HandleDotStrengthReduction(dot)); diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index fc78420147..0f08eb3a32 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2772,208 +2772,5 @@ DotOfConcatTestSpec kDotOfConcatTestSpecs[] = { INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); - -struct DotOfGatherTestSpec { - int64 m; - int64 k; - int64 n; - int s; // start index for dynamic slice on the non-contracting dimension - int64 lcd; // left contracting dimension - int64 rcd; // right contracting dimension - bool neg; // is negative testcase -}; - -class DotOfGatherSimplificationTest - : public HloVerifiedTestBase, - public ::testing::WithParamInterface {}; - -// input: dot(DS(ctA), ctB)) -// where DS(ctA) = DS({M x K}, {s, 0}, {1, K}) and ctB = {K x N}. -// => input dimensions: dot({1 x K}, {K x N}) => {1 x N}. -// output: DS(dot(ctA, ctB)) -// => output dimensions: DS ({M x N}, {s, 0}, {1, N}) => {1 x N}. -TEST_P(DotOfGatherSimplificationTest, ConstantRHS) { - HloComputation::Builder builder(TestName()); - - DotOfGatherTestSpec spec = GetParam(); - - ASSERT_LE(spec.s, spec.m); - - // For negative tests, increase k of the dynamic slice argument to prevent the - // optimization (constants ctA, ctB must have equal contracting dimensions). - int64 k_increase = spec.neg ? 5 : 0; - int64 lhs_rows = (spec.lcd == 0) ? (spec.k + k_increase) : spec.m; - int64 lhs_cols = (spec.lcd == 0) ? spec.m : (spec.k + k_increase); - Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); - auto* lhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, - /*cols=*/lhs_cols))); - - int32 start_row = (spec.lcd == 0) ? 0 : spec.s; - int32 start_col = (spec.lcd == 0) ? spec.s : 0; - const auto start_indices = - builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR1({start_row, start_col}))); - int64 slice_row_size = (spec.lcd == 0) ? spec.k : 1; - int64 slice_col_size = (spec.lcd == 0) ? 1 : spec.k; - Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); - auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( - ds_shape, lhs, start_indices, {slice_row_size, slice_col_size})); - - int64 rhs_rows = (spec.rcd == 0) ? spec.k : spec.n; - int64 rhs_cols = (spec.rcd == 0) ? spec.n : spec.k; - Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); - auto* rhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, - /*cols=*/rhs_cols))); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(spec.lcd); - dot_dnums.add_rhs_contracting_dimensions(spec.rcd); - - int64 dot_row_size = 1; - int64 dot_col_size = spec.n; - Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); - builder.AddInstruction( - HloInstruction::CreateDot(dot_shape, ds, rhs, dot_dnums)); - - auto computation = module().AddEntryComputation(builder.Build()); - AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, - non_bitcasting_callback()); - TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); - ASSERT_TRUE(run_successful); - EXPECT_TRUE( - ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); - - if (spec.neg) { - EXPECT_NE(computation->root_instruction()->opcode(), - HloOpcode::kDynamicSlice); - } else { - EXPECT_THAT(computation->root_instruction(), - op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), - op::Concatenate())); - } -} - -// input: dot(ctA, DS(ctB)) -// where ctA = {M x K} and DS(ctB) = DS({K x N}, {0, s}, {K, 1}). -// => input dimensions: dot({M x K}, {K x 1}) => {M x 1}. -// output: DS(dot(ctA, ctB)) -// => output dimensions: DS ({M x N}, {0, s}, {M, 1}) => {M x 1}. -TEST_P(DotOfGatherSimplificationTest, ConstantLHS) { - HloComputation::Builder builder(TestName()); - - DotOfGatherTestSpec spec = GetParam(); - - ASSERT_LE(spec.s, spec.n); - - int64 lhs_rows = (spec.lcd == 0) ? spec.k : spec.m; - int64 lhs_cols = (spec.lcd == 0) ? spec.m : spec.k; - Shape lhs_shape = ShapeUtil::MakeShape(F32, {lhs_rows, lhs_cols}); - auto* lhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/lhs_rows, - /*cols=*/lhs_cols))); - - // For negative tests increase k of the dynamic slice argument to prevent the - // optimization - int64 k_increase = spec.neg ? 5 : 0; - int64 rhs_rows = (spec.rcd == 0) ? (spec.k + k_increase) : spec.n; - int64 rhs_cols = (spec.rcd == 0) ? spec.n : (spec.k + k_increase); - Shape rhs_shape = ShapeUtil::MakeShape(F32, {rhs_rows, rhs_cols}); - auto* rhs = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR2F32Linspace( - /*from=*/10.0, /*to=*/10000.0, /*rows=*/rhs_rows, - /*cols=*/rhs_cols))); - - int32 start_row = (spec.rcd == 0) ? 0 : spec.s; - int32 start_col = (spec.rcd == 0) ? spec.s : 0; - const auto start_indices = - builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR1({start_row, start_col}))); - int64 slice_row_size = (spec.rcd == 0) ? spec.k : 1; - int64 slice_col_size = (spec.rcd == 0) ? 1 : spec.k; - Shape ds_shape = ShapeUtil::MakeShape(F32, {slice_row_size, slice_col_size}); - auto* ds = builder.AddInstruction(HloInstruction::CreateDynamicSlice( - ds_shape, rhs, start_indices, {slice_row_size, slice_col_size})); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(spec.lcd); - dot_dnums.add_rhs_contracting_dimensions(spec.rcd); - - int64 dot_row_size = spec.m; - int64 dot_col_size = 1; - Shape dot_shape = ShapeUtil::MakeShape(F32, {dot_row_size, dot_col_size}); - builder.AddInstruction( - HloInstruction::CreateDot(dot_shape, lhs, ds, dot_dnums)); - - auto computation = module().AddEntryComputation(builder.Build()); - AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, - non_bitcasting_callback()); - TF_ASSERT_OK_AND_ASSIGN(bool run_successful, simplifier.Run(&module())); - ASSERT_TRUE(run_successful); - EXPECT_TRUE( - ShapeUtil::Equal(computation->root_instruction()->shape(), dot_shape)); - - if (spec.neg) { - EXPECT_NE(computation->root_instruction()->opcode(), - HloOpcode::kDynamicSlice); - } else { - EXPECT_THAT(computation->root_instruction(), - op::DynamicSlice(op::Dot(op::Constant(), op::Constant()), - op::Concatenate())); - } -} - -std::vector DotOfGatherPositiveNegativeTests() { - std::vector positives = { - // "Classical dot", i.e. matrix multiply: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/0, - /*neg=*/false}, - // Note: testing for m=1 and n=1 is unnecessary, as this optimizes to - // dot(ct, ct) before DotOfGather optimization kicks in. - // Contract on rows: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/0, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/0, - /*neg=*/false}, - // Reverse matrix multiply: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/0, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/0, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/0, /*rcd=*/1, - /*neg=*/false}, - // Contract on columns: - {/*m=*/10, /*k=*/10, /*n=*/5, /*s=*/0, /*lcd=*/1, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/20, /*k=*/20, /*n=*/3, /*s=*/2, /*lcd=*/1, /*rcd=*/1, - /*neg=*/false}, - {/*m=*/10, /*k=*/3, /*n=*/10, /*s=*/9, /*lcd=*/1, /*rcd=*/1, - /*neg=*/false}, - }; - std::vector all; - for (int i = 0; i < positives.size(); i++) { - DotOfGatherTestSpec positive_test = positives[i]; - all.push_back(positive_test); - DotOfGatherTestSpec negative_test = positive_test; - negative_test.neg = true; - all.push_back(negative_test); - } - return all; -} - -INSTANTIATE_TEST_CASE_P( - DotOfGatherSimplificationTestInstantiation, DotOfGatherSimplificationTest, - ::testing::ValuesIn(DotOfGatherPositiveNegativeTests())); - } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 63354d4b30..6b0c04c2c0 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -703,251 +703,5 @@ TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) { &builder, expected, {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); } - -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstRHSClassicMM) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{96.0, 105.0, 114.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -TEST_F(DotOperationTest, DotOfGatherOptimizationWithConstLHSClassicMM) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 105, 96}, {96, 105, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{105.0}, {105.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( - DotOfGatherOptimizationWithConstRHSReverseMM)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{105.0, 105.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU(DISABLED_ON_INTERPRETER( - DotOfGatherOptimizationWithConstLHSReverseMM)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{114, 96}, {105, 105}, {96, 114}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{96.0}, {105.0}, {114.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSRows)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0}, - {3.0, 4.0}, - {5.0, 6.0}, - {6.0, 5.0}, - {4.0, 3.0}, - {2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{126.0, 129.0, 132.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSRows)))) { - std::unique_ptr> constant_lhs_array( - new Array2D({{1.0, 2.0}, - {3.0, 4.0}, - {5.0, 6.0}, - {6.0, 5.0}, - {4.0, 3.0}, - {2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0}, - {4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0}, - {9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0}, - {3.0, 2.0, 1.0}})); - // Dot result to slice from: {{132, 129, 126}, {126, 129, 132}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({0, 1}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {6, 1}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(0); - dot_dnums.add_rhs_contracting_dimensions(0); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{129.0}, {129.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstRHSCols)))) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(lhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(dynamic_slice, rhs_constant, dot_dnums); - - Array2D expected({{56.0, 168.0, 91.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} - -// TODO (b/69062148) Enable when Dot implements general contracting dimensions. -TEST_F(DotOperationTest, - DISABLED_ON_CPU(DISABLED_ON_GPU( - DISABLED_ON_INTERPRETER(DotOfGatherOptimizationWithConstLHSCols)))) { - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, - {7.0, 8.0, 9.0, 9.0, 8.0, 7.0}, - {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); - // Dot result to slice from: {{91, 168, 56}, {56, 168, 91}} - - ComputationBuilder builder(client_, TestName()); - auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); - auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto start_constant = builder.ConstantR1({1, 0}); - auto dynamic_slice = - builder.DynamicSlice(rhs_constant, start_constant, {1, 6}); - - DotDimensionNumbers dot_dnums; - dot_dnums.add_lhs_contracting_dimensions(1); - dot_dnums.add_rhs_contracting_dimensions(1); - auto result = builder.DotGeneral(lhs_constant, dynamic_slice, dot_dnums); - - Array2D expected({{168.0}, {168.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); -} } // namespace } // namespace xla -- GitLab From 203caffbee9470109e3f750ba847e0aa4894a1e6 Mon Sep 17 00:00:00 2001 From: Rajendra arora Date: Fri, 16 Feb 2018 10:56:25 +0530 Subject: [PATCH 0100/2939] Documentation api reference badge added in Readme.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 916e5200b2..efacf063e3 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,10 @@ ----------------- -| **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | -|-----------------|---------------------|------------------|-------------------|---------------| -| [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) | + +| **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | +|-----------------|---------------------|------------------|-------------------|---------------|---------------| +| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) **TensorFlow** is an open source software library for numerical computation using data flow graphs. The graph nodes represent mathematical operations, while -- GitLab From 4e7772e0c74a663809f9fcf39545032eb8277e6a Mon Sep 17 00:00:00 2001 From: Rajendra arora Date: Fri, 16 Feb 2018 11:48:10 +0530 Subject: [PATCH 0101/2939] Added a contribution guideline header in readme.md --- README.md | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index efacf063e3..ef5bdc66ef 100644 --- a/README.md +++ b/README.md @@ -22,20 +22,6 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. -**If you want to contribute to TensorFlow, be sure to review the [contribution -guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's -[code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to -uphold this code.** - -**We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for -tracking requests and bugs. So please see -[TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions -and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).** - -The TensorFlow project strives to abide by generally accepted best practices in open-source software development: - -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) - ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* @@ -76,6 +62,22 @@ $ python >>> sess.close() ``` +## Contribution guidelines + +**If you want to contribute to TensorFlow, be sure to review the [contribution +guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's +[code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to +uphold this code.** + +**We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for +tracking requests and bugs. So please see +[TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions +and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).** + +The TensorFlow project strives to abide by generally accepted best practices in open-source software development: + +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) + ## For more information * [TensorFlow Website](https://www.tensorflow.org) -- GitLab From f08155b7256e59f265a38d30de21ed2ced9d5ffa Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Thu, 15 Feb 2018 22:21:02 -0800 Subject: [PATCH 0102/2939] Make the default values for experimental and non experimental apis match. PiperOrigin-RevId: 185952648 --- .../contrib/quantize/python/quantize_graph.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 0dfe78fd02..5a3a74cec4 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -69,7 +69,7 @@ def _create_graph(input_graph=None, activation_bits=activation_bits) -def create_training_graph(input_graph=None, quant_delay=250000): +def create_training_graph(input_graph=None, quant_delay=0): """Rewrites a training input_graph in place for simulated quantization. The graph has fake quantization ops inserted to simulate the error @@ -77,6 +77,14 @@ def create_training_graph(input_graph=None, quant_delay=250000): the expected behavior of previously held references to nodes and tensors may change. + The default value of quant_delay is suitable for finetuning an already trained + floating point model (recommended). + If one wants to train a quantized model from scratch, quant_delay should be + set to the number of steps it take the floating point model to converge. + Quantization will be activated at this point and effectively finetune the + model. If quant_delay is not provided when training from scratch, training can + often fail. + Args: input_graph: The tf.Graph to be transformed. quant_delay: Number of steps after which weights and activations are @@ -93,12 +101,12 @@ def create_training_graph(input_graph=None, quant_delay=250000): # Corresponds to case of restoring from a floating point checkpoint # In this case, we can freeze the moving mean and variance early on and # switch to using them during training. Therefore, freeze_bn_delay is set to - # 200000 - freeze_bn_delay = 200000 + # 2e5. + freeze_bn_delay = int(2e5) else: # If training from scratch, set freeze_bn_delay to 100 epochs after quant # delay. With a batch size of 64, this corresponds to 20000*100=2M steps. - freeze_bn_delay = quant_delay + 2000000 + freeze_bn_delay = quant_delay + int(2e6) _create_graph( input_graph=input_graph, @@ -129,8 +137,8 @@ def create_eval_graph(input_graph=None): def experimental_create_training_graph(input_graph=None, weight_bits=8, activation_bits=8, - quant_delay=250000, - freeze_bn_delay=500000): + quant_delay=0, + freeze_bn_delay=int(2e5)): """Rewrites a training input_graph in place for simulated quantization. This function has additional experimental options not (yet) available to @@ -141,6 +149,14 @@ def experimental_create_training_graph(input_graph=None, the expected behavior of previously held references to nodes and tensors may change. + The default value of quant_delay is suitable for finetuning an already trained + floating point model (recommended). + If one wants to train a quantized model from scratch, quant_delay should be + set to the number of steps it take the floating point model to converge. + Quantization will be activated at this point and effectively finetune the + model. If quant_delay is not provided when training from scratch, training can + often fail. + Args: input_graph: The tf.Graph to be transformed,if None then defaults to the default graph. -- GitLab From d536c5de09276ae935981a498c6ac46006646809 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 15 Feb 2018 23:44:47 -0800 Subject: [PATCH 0103/2939] Code generator for builtin_ops.h, and a test to ensure its consistency PiperOrigin-RevId: 185957720 --- tensorflow/contrib/lite/builtin_ops.h | 80 +++++++++++ .../lite/schema/builtin_ops_header/BUILD | 43 ++++++ .../lite/schema/builtin_ops_header/README.md | 12 ++ .../builtin_ops_header/consistency_test.cc | 47 +++++++ .../schema/builtin_ops_header/generate.cc | 25 ++++ .../schema/builtin_ops_header/generator.cc | 132 ++++++++++++++++++ .../schema/builtin_ops_header/generator.h | 38 +++++ .../builtin_ops_header/generator_test.cc | 63 +++++++++ 8 files changed, 440 insertions(+) create mode 100644 tensorflow/contrib/lite/builtin_ops.h create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/BUILD create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/README.md create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generator.h create mode 100644 tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h new file mode 100644 index 0000000000..4ebd1586de --- /dev/null +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -0,0 +1,80 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +#define TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ + +// DO NOT EDIT MANUALLY: This file is automatically generated by +// `schema_builtin_ops_header_generator.py`. + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +typedef enum { + kTfLiteBuiltinAdd = 0, + kTfLiteBuiltinAveragePool2d = 1, + kTfLiteBuiltinConcatenation = 2, + kTfLiteBuiltinConv2d = 3, + kTfLiteBuiltinDepthwiseConv2d = 4, + kTfLiteBuiltinEmbeddingLookup = 7, + kTfLiteBuiltinFullyConnected = 9, + kTfLiteBuiltinHashtableLookup = 10, + kTfLiteBuiltinL2Normalization = 11, + kTfLiteBuiltinL2Pool2d = 12, + kTfLiteBuiltinLocalResponseNormalization = 13, + kTfLiteBuiltinLogistic = 14, + kTfLiteBuiltinLshProjection = 15, + kTfLiteBuiltinLstm = 16, + kTfLiteBuiltinMaxPool2d = 17, + kTfLiteBuiltinMul = 18, + kTfLiteBuiltinRelu = 19, + kTfLiteBuiltinReluN1To1 = 20, + kTfLiteBuiltinRelu6 = 21, + kTfLiteBuiltinReshape = 22, + kTfLiteBuiltinResizeBilinear = 23, + kTfLiteBuiltinRnn = 24, + kTfLiteBuiltinSoftmax = 25, + kTfLiteBuiltinSpaceToDepth = 26, + kTfLiteBuiltinSvdf = 27, + kTfLiteBuiltinTanh = 28, + kTfLiteBuiltinConcatEmbeddings = 29, + kTfLiteBuiltinSkipGram = 30, + kTfLiteBuiltinCall = 31, + kTfLiteBuiltinCustom = 32, + kTfLiteBuiltinEmbeddingLookupSparse = 33, + kTfLiteBuiltinPad = 34, + kTfLiteBuiltinUnidirectionalSequenceRnn = 35, + kTfLiteBuiltinGather = 36, + kTfLiteBuiltinBatchToSpaceNd = 37, + kTfLiteBuiltinSpaceToBatchNd = 38, + kTfLiteBuiltinTranspose = 39, + kTfLiteBuiltinMean = 40, + kTfLiteBuiltinSub = 41, + kTfLiteBuiltinDiv = 42, + kTfLiteBuiltinSqueeze = 43, + kTfLiteBuiltinUnidirectionalSequenceLstm = 44, + kTfLiteBuiltinStridedSlice = 45, + kTfLiteBuiltinBidirectionalSequenceRnn = 46, + kTfLiteBuiltinExp = 47, + kTfLiteBuiltinTopkV2 = 48, + kTfLiteBuiltinSplit = 49, +} TfLiteBuiltinOperator; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +} diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/BUILD b/tensorflow/contrib/lite/schema/builtin_ops_header/BUILD new file mode 100644 index 0000000000..0148149a6a --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/BUILD @@ -0,0 +1,43 @@ +package(default_visibility = [ + "//visibility:public", +]) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "generator", + srcs = ["generator.cc"], + hdrs = ["generator.h"], + deps = [ + "//tensorflow/contrib/lite/schema:schema_fbs", + ], +) + +cc_binary( + name = "generate", + srcs = ["generate.cc"], + deps = [ + ":generator", + ], +) + +cc_test( + name = "generator_test", + srcs = ["generator_test.cc"], + deps = [ + ":generator", + "@com_google_googletest//:gtest", + ], +) + +cc_test( + name = "consistency_test", + srcs = ["consistency_test.cc"], + data = [ + "//tensorflow/contrib/lite:builtin_ops.h", + ], + deps = [ + ":generator", + "@com_google_googletest//:gtest", + ], +) diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/README.md b/tensorflow/contrib/lite/schema/builtin_ops_header/README.md new file mode 100644 index 0000000000..f20d4f664e --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/README.md @@ -0,0 +1,12 @@ +# Builtin Ops Header Generator. + +This directory contains a code generator to generate a pure C header for +builtin op definition. + +Whenever you add a new builtin op, please execute: + +```sh +bazel run \ + //tensorflow/contrib/lite/schema/builtin_ops_header:generate > \ + tensorflow/contrib/lite/builtin_ops.h +``` diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc new file mode 100644 index 0000000000..d55c125c11 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/consistency_test.cc @@ -0,0 +1,47 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" + +namespace { + +const char* kHeaderFileName = + "tensorflow/contrib/lite/builtin_ops.h"; + +// The test ensures that `builtin_ops.h` is consistent with the FlatBuffer +// schema definition. When the schema is modified, it's required to run the +// generator to re-generate the header. +// Please see README.md for more details. +TEST(BuiltinOpsHeaderTest, TestConsistency) { + std::ifstream input_stream(kHeaderFileName, std::ios::binary); + ASSERT_TRUE(input_stream); + std::string file_content((std::istreambuf_iterator(input_stream)), + std::istreambuf_iterator()); + + std::ostringstream output_stream; + tflite::builtin_ops_header::GenerateHeader(output_stream); + std::string generated_content = output_stream.str(); + + EXPECT_EQ(file_content, generated_content); +} + +} // anonymous namespace + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc new file mode 100644 index 0000000000..72a28987b8 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generate.cc @@ -0,0 +1,25 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" + +// This executable is used to generate builtin_ops.h in TensorFlow Lite. +// Please see README.md for more details. +int main() { + if (!tflite::builtin_ops_header::GenerateHeader(std::cout)) { + std::cerr << "Failed to generate the header file.\n"; + } + return 0; +} diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc new file mode 100644 index 0000000000..b983d59d85 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -0,0 +1,132 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" + +namespace tflite { +namespace builtin_ops_header { + +namespace { +const char* kFileHeader = + R"(/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +#define TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ + +// DO NOT EDIT MANUALLY: This file is automatically generated by +// `schema_builtin_ops_header_generator.py`. + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +typedef enum { +)"; + +const char* kFileFooter = + R"(} TfLiteBuiltinOperator; + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif // TENSORFLOW_CONTRIB_LITE_BUILTIN_OPS_H_ +} +)"; +} // anonymous namespace + +bool IsValidInputEnumName(const std::string& name) { + const char* begin = name.c_str(); + const char* ch = begin; + while (*ch != '\0') { + // If it's not the first character, expect an underscore. + if (ch != begin) { + if (*ch != '_') { + return false; + } + ++ch; + } + + // Expecting a word with upper case letters or digits, like "CONV", + // "CONV2D", "2D"...etc. + bool empty = true; + while (isupper(*ch) || isdigit(*ch)) { + // It's not empty if at least one character is consumed. + empty = false; + ++ch; + } + if (empty) { + return false; + } + } + return true; +} + +std::string ConstantizeVariableName(const std::string& name) { + std::string result = "kTfLiteBuiltin"; + bool uppercase = true; + for (char input_char : name) { + if (input_char == '_') { + uppercase = true; + } else if (uppercase) { + result += toupper(input_char); + uppercase = false; + } else { + result += tolower(input_char); + } + } + + return result; +} + +bool GenerateHeader(std::ostream& os) { + auto enum_names = tflite::EnumNamesBuiltinOperator(); + + // Check if all the input enum names are valid. + for (auto enum_value : EnumValuesBuiltinOperator()) { + auto enum_name = enum_names[enum_value]; + if (!IsValidInputEnumName(enum_name)) { + std::cerr << "Invalid input enum name: " << enum_name << std::endl; + return false; + } + } + + os << kFileHeader; + for (auto enum_value : EnumValuesBuiltinOperator()) { + auto enum_name = enum_names[enum_value]; + os << " "; + os << ConstantizeVariableName(enum_name); + os << " = "; + os << enum_value; + os << ",\n"; + } + os << kFileFooter; + return true; +} + +} // namespace builtin_ops_header +} // namespace tflite diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.h b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.h new file mode 100644 index 0000000000..3241ff83d5 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.h @@ -0,0 +1,38 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// An utility library to generate pure C header for builtin ops definition. +#ifndef TENSORFLOW_CONTRIB_LITE_SCHEMA_BUILTIN_OPS_HEADER_GENERATOR_H_ +#define TENSORFLOW_CONTRIB_LITE_SCHEMA_BUILTIN_OPS_HEADER_GENERATOR_H_ + +#include + +namespace tflite { +namespace builtin_ops_header { + +// Check if the input enum name (from the Flatbuffer definition) is valid. +bool IsValidInputEnumName(const std::string& name); + +// Convert the enum name from Flatbuffer convention to C enum name convention. +// E.g. `L2_POOL_2D` becomes `kTfLiteBuiltinL2Pool2d`. +std::string ConstantizeVariableName(const std::string& name); + +// The function generates a pure C header for builtin ops definition, and write +// it to the output stream. +bool GenerateHeader(std::ostream& os); + +} // namespace builtin_ops_header +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_SCHEMA_BUILTIN_OPS_HEADER_GENERATOR_H_ diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc new file mode 100644 index 0000000000..a7dc8e1b04 --- /dev/null +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator_test.cc @@ -0,0 +1,63 @@ + +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/schema/builtin_ops_header/generator.h" +#include +#include + +namespace { + +using tflite::builtin_ops_header::ConstantizeVariableName; +using tflite::builtin_ops_header::IsValidInputEnumName; + +TEST(TestIsValidInputEnumName, TestWithValidInputNames) { + EXPECT_TRUE(IsValidInputEnumName("ADD")); + EXPECT_TRUE(IsValidInputEnumName("CONV_2D")); + EXPECT_TRUE(IsValidInputEnumName("L2_POOL_2D")); +} + +TEST(TestIsValidInputEnumName, TestWithLeadingUnderscore) { + EXPECT_FALSE(IsValidInputEnumName("_ADD")); + EXPECT_FALSE(IsValidInputEnumName("_CONV_2D")); +} + +TEST(TestIsValidInputEnumName, TestWithLowerCase) { + EXPECT_FALSE(IsValidInputEnumName("_AdD")); + EXPECT_FALSE(IsValidInputEnumName("_COnV_2D")); +} + +TEST(TestIsValidInputEnumName, TestWithOtherCharacters) { + EXPECT_FALSE(IsValidInputEnumName("_AdD!2D")); + EXPECT_FALSE(IsValidInputEnumName("_COnV?2D")); +} + +TEST(TestIsValidInputEnumName, TestWithDoubleUnderscores) { + EXPECT_FALSE(IsValidInputEnumName("ADD__2D")); + EXPECT_FALSE(IsValidInputEnumName("CONV__2D")); +} + +TEST(TestConstantizeVariableName, TestWithValidInputNames) { + EXPECT_EQ(ConstantizeVariableName("ADD"), "kTfLiteBuiltinAdd"); + EXPECT_EQ(ConstantizeVariableName("CONV_2D"), "kTfLiteBuiltinConv2d"); + EXPECT_EQ(ConstantizeVariableName("L2_POOL_2D"), "kTfLiteBuiltinL2Pool2d"); +} + +} // anonymous namespace + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} -- GitLab From ecfcf07a418a526e1a6cb2d9c8d6a5bd9d46d430 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 16 Feb 2018 01:53:59 -0800 Subject: [PATCH 0104/2939] Remove a possible ambiguity in the `py_func` documentation. PiperOrigin-RevId: 185968663 --- tensorflow/python/ops/script_ops.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 61e14adf4b..0ba29cbf32 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -267,7 +267,7 @@ def py_func(func, inp, Tout, stateful=True, name=None): """Wraps a python function and uses it as a TensorFlow op. Given a python function `func`, which takes numpy arrays as its - inputs and returns numpy arrays as its outputs, wrap this function as an + arguments and returns numpy arrays as its outputs, wrap this function as an operation in a TensorFlow graph. The following snippet constructs a simple TensorFlow graph that invokes the `np.sinh()` NumPy function as a operation in the graph: @@ -276,8 +276,8 @@ def py_func(func, inp, Tout, stateful=True, name=None): def my_func(x): # x will be a numpy array with the contents of the placeholder below return np.sinh(x) - inp = tf.placeholder(tf.float32) - y = tf.py_func(my_func, [inp], tf.float32) + input = tf.placeholder(tf.float32) + y = tf.py_func(my_func, [input], tf.float32) ``` **N.B.** The `tf.py_func()` operation has the following known limitations: @@ -293,10 +293,12 @@ def py_func(func, inp, Tout, stateful=True, name=None): server (e.g. using `with tf.device():`). Args: - func: A Python function, which accepts a list of NumPy `ndarray` objects - having element types that match the corresponding `tf.Tensor` objects - in `inp`, and returns a list of `ndarray` objects (or a single `ndarray`) - having element types that match the corresponding values in `Tout`. + func: A Python function, which accepts `ndarray` objects as arguments and + returns a list of `ndarray` objects (or a single `ndarray`). This function + must accept as many arguments as there are tensors in `inp`, and these + argument types will match the corresponding `tf.Tensor` objects + in `inp`. The returns `ndarray`s must match the number and types defined + `Tout`. Important Note: Input and output numpy `ndarray`s of `func` are not guaranteed to be copies. In some cases their underlying memory will be shared with the corresponding TensorFlow tensors. -- GitLab From f4d95b4abc45645ff5ed1670abc73fe0ffe49a82 Mon Sep 17 00:00:00 2001 From: kdavis-mozilla Date: Fri, 16 Feb 2018 11:41:19 +0100 Subject: [PATCH 0105/2939] Added Deep Speech use --- tensorflow/docs_src/about/uses.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/docs_src/about/uses.md b/tensorflow/docs_src/about/uses.md index 8818177a28..d646880bd3 100644 --- a/tensorflow/docs_src/about/uses.md +++ b/tensorflow/docs_src/about/uses.md @@ -22,6 +22,14 @@ This section describes some of the current uses of the TensorFlow system. > TensorFlow, or even better, send us a pull request to add an entry to this > file. +* **Deep Speech** +
    +
  • **Organization**: Mozilla
  • +
  • **Domain**: Speech Recognition
  • +
  • **Description**: A TensorFlow implementation motivated by Baidu's Deep Speech architecture.
  • +
  • **More info**: [GitHub Repo](https://github.com/mozilla/deepspeech)
  • +
+ * **RankBrain**