diff --git a/.bazelrc b/.bazelrc index d5d20309df82498a552df759e3d200a914a4cfb7..1945078789dcd48603ceb322c34ab2cd5af5eb59 100644 --- a/.bazelrc +++ b/.bazelrc @@ -66,6 +66,13 @@ build:gdr --define=with_gdr_support=true build:ngraph --define=with_ngraph_support=true build:verbs --define=with_verbs_support=true +# Options to disable default on features +build:noaws --define=no_aws_support=true +build:nogcp --define=no_gcp_support=true +build:nohdfs --define=no_hdfs_support=true +build:nokafka --define=no_kafka_support=true +build:noignite --define=no_ignite_support=true + build --define=use_fast_cpp_protos=true build --define=allow_oversize_protos=true build --define=grpc_no_ares=true diff --git a/README.md b/README.md index 57efb876c9afaf9fe76c4ced4e6a1572e9241edf..c3455474260b2db56f1f585b70af9c259704d01a 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,21 @@ subscribing to [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). ## Installation -*See [Installing TensorFlow](https://www.tensorflow.org/install) for instructions on how to install our release binaries or how to build from source.* + +To install the current release for CPU-only: + +``` +pip install tensorflow +``` + +Use the GPU package for CUDA-enabled GPU cards: + +``` +pip install tensorflow-gpu +``` + +*See [Installing TensorFlow](https://www.tensorflow.org/install) for detailed +instructions, and how to build from source.* People who are a little more adventurous can also try our nightly binaries: @@ -93,14 +107,14 @@ The TensorFlow project strives to abide by generally accepted best practices in ### Community Supported Builds -| Build Type | Status | Artifacts | -| --- | --- | --- | -| **IBM s390x** | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA | -| **IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA | -| **IBM ppc64le GPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA | -| **Linux CPU with Intel® MKL-DNN** Nightly | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) | -| **Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild)|[1.10.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp27-cp27mu-linux_x86_64.whl)
[1.10.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl)
[1.10.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp36-cp36m-linux_x86_64.whl) | - +Build Type | Status | Artifacts +---------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**IBM s390x** | [![Build Status](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/badge/icon)](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA +**IBM ppc64le CPU** | [![Build Status](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/badge/icon)](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA +**IBM ppc64le GPU** Nightly | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) +**IBM ppc64le GPU** Stable Release | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) +**Linux CPU with Intel® MKL-DNN** Nightly | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) +**Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.10.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp27-cp27mu-linux_x86_64.whl)
[1.10.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp35-cp35m-linux_x86_64.whl)
[1.10.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.10.0-cp36-cp36m-linux_x86_64.whl) ## For more information * [TensorFlow Website](https://www.tensorflow.org) diff --git a/configure.py b/configure.py index 89dc79b6b6bb168339d05182fd9da47dfc90ce54..b564da27227ec07713f91e925ea292b35f0f02df 100644 --- a/configure.py +++ b/configure.py @@ -497,7 +497,7 @@ def set_cc_opt_flags(environ_cp): elif is_windows(): default_cc_opt_flags = '/arch:AVX' else: - default_cc_opt_flags = '-march=native' + default_cc_opt_flags = '-march=native -Wno-sign-compare' question = ('Please specify optimization flags to use during compilation when' ' bazel option "--config=opt" is specified [Default is %s]: ' ) % default_cc_opt_flags @@ -1498,14 +1498,6 @@ def set_windows_build_flags(environ_cp): # TODO(pcloudy): Remove this flag when upgrading Bazel to 0.16.0 # Short object file path will be enabled by default. write_to_bazelrc('build --experimental_shortened_obj_file_path=true') - # When building zip file for some py_binary and py_test targets, don't - # include its dependencies. This is for: - # 1. Running python tests against the system installed TF pip package. - # 2. Avoiding redundant files in - # //tensorflow/tools/pip_package:simple_console_windows, - # which is a py_binary used during creating TF pip package. - # See https://github.com/tensorflow/tensorflow/issues/22390 - write_to_bazelrc('build --define=no_tensorflow_py_deps=true') if get_var( environ_cp, 'TF_OVERRIDE_EIGEN_STRONG_INLINE', 'Eigen strong inline', @@ -1560,13 +1552,11 @@ def main(): # TODO(ibiryukov): Investigate using clang as a cpu or cuda compiler on # Windows. environ_cp['TF_DOWNLOAD_CLANG'] = '0' - environ_cp['TF_ENABLE_XLA'] = '0' environ_cp['TF_NEED_MPI'] = '0' environ_cp['TF_SET_ANDROID_WORKSPACE'] = '0' if is_macos(): environ_cp['TF_NEED_TENSORRT'] = '0' - environ_cp['TF_ENABLE_XLA'] = '0' # The numpy package on ppc64le uses OpenBLAS which has multi-threading # issues that lead to incorrect answers. Set OMP_NUM_THREADS=1 at @@ -1575,10 +1565,9 @@ def main(): if is_ppc64le(): write_action_env_to_bazelrc('OMP_NUM_THREADS', 1) - set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite', - 'with_ignite_support', True, 'ignite') + xla_enabled_by_default = is_linux() set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', - True, 'xla') + xla_enabled_by_default, 'xla') set_action_env_var(environ_cp, 'TF_NEED_OPENCL_SYCL', 'OpenCL SYCL', False) if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': @@ -1670,19 +1659,23 @@ def main(): create_android_ndk_rule(environ_cp) create_android_sdk_rule(environ_cp) - # On Windows, we don't have MKL support and the build is always monolithic. - # So no need to print the following message. - # TODO(pcloudy): remove the following if check when they make sense on Windows - if not is_windows(): - print('Preconfigured Bazel build configs. You can use any of the below by ' - 'adding "--config=<>" to your build command. See .bazelrc for more ' - 'details.') - config_info_line('mkl', 'Build with MKL support.') - config_info_line('monolithic', 'Config for mostly static monolithic build.') - config_info_line('gdr', 'Build with GDR support.') - config_info_line('verbs', 'Build with libverbs support.') - config_info_line('ngraph', 'Build with Intel nGraph support.') + print('Preconfigured Bazel build configs. You can use any of the below by ' + 'adding "--config=<>" to your build command. See .bazelrc for more ' + 'details.') + config_info_line('mkl', 'Build with MKL support.') + config_info_line('monolithic', 'Config for mostly static monolithic build.') + config_info_line('gdr', 'Build with GDR support.') + config_info_line('verbs', 'Build with libverbs support.') + config_info_line('ngraph', 'Build with Intel nGraph support.') + + print('Preconfigured Bazel build configs to DISABLE default on features:') + config_info_line('noaws', 'Disable AWS S3 filesystem support.') + config_info_line('nogcp', 'Disable GCP support.') + config_info_line('nohdfs', 'Disable HDFS support.') + config_info_line('noignite', 'Disable Apacha Ignite support.') + config_info_line('nokafka', 'Disable Apache Kafka support.') if __name__ == '__main__': main() + diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 9b62a504525d5377d4836e92bdf0e46f7fc3ef38..77e3baaff198b402dc04daa1b11e4007b9906b23 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -209,12 +209,40 @@ config_setting( visibility = ["//visibility:public"], ) +# Features that are default ON are handled differently below. +# +config_setting( + name = "no_aws_support", + define_values = {"no_aws_support": "false"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "no_gcp_support", + define_values = {"no_gcp_support": "false"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "no_hdfs_support", + define_values = {"no_hdfs_support": "false"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "no_ignite_support", + define_values = {"no_ignite_support": "false"}, + visibility = ["//visibility:public"], +) + config_setting( - name = "with_ignite_support", - define_values = {"with_ignite_support": "true"}, + name = "no_kafka_support", + define_values = {"no_kafka_support": "false"}, visibility = ["//visibility:public"], ) +# Crosses between platforms and file system libraries not supported on those +# platforms due to limitations in nested select() statements. config_setting( name = "with_cuda_support_windows_override", define_values = {"using_cuda_nvcc": "true"}, diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index b587e63227708427e7fae47f8f4a7b524d963ed9..c18b07603ae3841d3581741ab5a43f2e8b628356 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -411,6 +411,7 @@ tf_cc_test( srcs = ["gradients/nn_grad_test.cc"], deps = [ ":cc_ops", + ":cc_ops_internal", ":grad_op_registry", ":grad_testutil", ":gradient_checker", @@ -453,11 +454,33 @@ tf_cc_test( ], ) +# Generates separate libraries for array_ops and math_ops to reduce the dependency count of targets that depend on only these tf_gen_op_wrappers_cc( - name = "cc_ops", + name = "math_ops", + api_def_srcs = ["//tensorflow/core/api_def:base_api_def"], + op_lib_names = [ + "math_ops", + ], + pkg = "//tensorflow/core", +) + +tf_gen_op_wrappers_cc( + name = "array_ops", api_def_srcs = ["//tensorflow/core/api_def:base_api_def"], op_lib_names = [ "array_ops", + ], + pkg = "//tensorflow/core", +) + +tf_gen_op_wrappers_cc( + name = "cc_ops", + api_def_srcs = ["//tensorflow/core/api_def:base_api_def"], + deps_internal = [ + ":array_ops_internal", + ":math_ops_internal", + ], + op_lib_names = [ "audio_ops", "candidate_sampling_ops", "control_flow_ops", @@ -468,7 +491,6 @@ tf_gen_op_wrappers_cc( "logging_ops", "lookup_ops", "manip_ops", - "math_ops", "nn_ops", "no_op", "parsing_ops", @@ -480,10 +502,21 @@ tf_gen_op_wrappers_cc( "user_ops", ], other_hdrs = [ + "ops/array_ops.h", "ops/const_op.h", + "ops/math_ops.h", "ops/standard_ops.h", ], + other_hdrs_internal = [ + "ops/array_ops_internal.h", + "ops/math_ops_internal.h", + ], pkg = "//tensorflow/core", + deps = [ + ":array_ops", + ":const_op", + ":math_ops", + ], ) tf_cc_test( diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 588e96cb196189780037f66266484962ba0385e4..2a32a2ed6f7862a29f4ce3d1aba5fdbc86adc670 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -143,6 +143,33 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper); +Status LeakyReluGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), + attrs.Alpha(alpha)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); + +Status LeakyReluGradGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(1), + attrs.Alpha(alpha)); + grad_outputs->push_back(dx); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyReluGrad", LeakyReluGradGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index aa72cf7ba2a958f54d50b59f0edaefb27edf0e86..f5a09e09dcda3e06c71d44d5fa5a1b121a9ade58 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/framework/gradient_checker.h" #include "tensorflow/cc/framework/testutil.h" #include "tensorflow/cc/gradients/grad_testutil.h" +#include "tensorflow/cc/ops/nn_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -160,6 +161,32 @@ TEST_F(NNGradTest, Relu6Grad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = ops::internal::LeakyRelu(scope_, x); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + RunTest(x, x_init_value, y, shape); +} + +TEST_F(NNGradTest, LeakyReluGradGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, {5, 2}); + Tensor features = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + auto y = ops::internal::LeakyReluGrad(scope_, x, features); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 661b444a42eefadf52739d84483e8e26c07fadf5..ced0cd03f74d147451ca2bf54108dc7517b50acd 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -324,7 +324,6 @@ cc_library( "//tensorflow/core:graph", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", - "@com_google_absl//absl/container:flat_hash_map", "@com_google_absl//absl/container:flat_hash_set", "@com_google_absl//absl/memory", "@com_google_absl//absl/strings", @@ -390,6 +389,7 @@ cc_library( "//tensorflow/cc:ops", "//tensorflow/cc:scope_internal", "//tensorflow/compiler/jit/graphcycles", + "//tensorflow/compiler/jit/legacy_flags:build_xla_ops_pass_flags", "//tensorflow/compiler/jit/legacy_flags:mark_for_compilation_pass_flags", "//tensorflow/compiler/jit/ops:xla_ops", "//tensorflow/compiler/tf2xla:dump_graph", @@ -499,6 +499,7 @@ tf_cc_test( ":compilation_passes", ":node_matchers", ":xla_cluster_util", + ":xla_cpu_device", ":xla_gpu_device", "//tensorflow/cc:cc_ops", "//tensorflow/cc:cc_ops_internal", @@ -510,6 +511,7 @@ tf_cc_test( "//tensorflow/compiler/tf2xla:test_util", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/tf2xla/cc:xla_jit_ops", + "//tensorflow/compiler/tf2xla/cc:xla_ops", "//tensorflow/compiler/tf2xla/kernels:xla_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -621,6 +623,7 @@ cc_library( deps = [ "//tensorflow/cc:ops", "//tensorflow/compiler/xla:test", + "//tensorflow/core:framework", "//tensorflow/core:graph", "//tensorflow/core:protos_all_cc", "@com_google_absl//absl/algorithm:container", @@ -636,6 +639,7 @@ tf_cc_test( deps = [ ":node_matchers", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", "//tensorflow/cc:ops", "//tensorflow/core:ops", "//tensorflow/core:test_main", diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc index 5974696b7751d69eb27141173fdab14313925ee9..054f31ba3352b2215e6b0448c8ec8a70cb98b8e5 100644 --- a/tensorflow/compiler/jit/build_xla_ops_pass.cc +++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc @@ -15,10 +15,16 @@ limitations under the License. #include "tensorflow/compiler/jit/build_xla_ops_pass.h" #include "absl/algorithm/container.h" +#include "absl/strings/str_cat.h" #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope_internal.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/control_flow_ops.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" +#include "tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h" +#include "tensorflow/compiler/jit/xla_cluster_util.h" #include "tensorflow/compiler/tf2xla/cc/ops/xla_jit_ops.h" #include "tensorflow/compiler/tf2xla/dump_graph.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" @@ -48,6 +54,88 @@ void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) { } } +// Returns a data value that is dead iff `control` is dead. +Output ControlToData(const Scope& scope, Node* control) { + Output data = ops::Const(scope.WithOpName("ctrl_as_data"), + Tensor(DT_BOOL, TensorShape({0}))); + scope.graph()->AddControlEdge(control, data.node()); + return Output(data.node()); +} + +// Returns an operation that can be control-depended on that is dead iff `data` +// is dead. +Operation DataToControl(const Scope& scope, Output data) { + return Operation( + ops::Identity(scope.WithOpName("data_as_ctrl"), data).node()); +} + +// Replaces each outgoing edge from `old_node` with a merge node that merges in +// the corresponding output from `new_node`. +void MergeOutgoingDataEdges(const Scope& s, Node* old_node, Node* new_node) { + if (!s.status().ok()) { + return; + } + + std::vector merged_outputs(old_node->num_outputs(), Output(nullptr)); + + std::vector data_edges; + absl::c_copy_if(old_node->out_edges(), std::back_inserter(data_edges), + [](const Edge* e) { return !e->IsControlEdge(); }); + + for (const Edge* e : data_edges) { + int oidx = e->src_output(); + Output merged_output = merged_outputs[oidx]; + if (merged_output.node() == nullptr) { + ops::Merge merge_op(s.WithOpName(absl::StrCat("merge_oidx_", oidx)), + {Output(old_node, oidx), Output(new_node, oidx)}); + merged_output = merged_outputs[oidx] = merge_op.output; + } + + Node* dst = e->dst(); + int dst_idx = e->dst_input(); + + s.graph()->RemoveEdge(e); + s.graph()->AddEdge(merged_output.node(), merged_output.index(), dst, + dst_idx); + } +} + +// Replaces each control successor of `old_node` to execute whenever either +// `old_node` or `new_node` is executed. +void MergeOutgoingControlEdges(const Scope& s, Node* old_node, Node* new_node) { + if (!s.status().ok()) { + return; + } + + std::vector ctrl_edges; + absl::c_copy_if(old_node->out_edges(), std::back_inserter(ctrl_edges), + [](const Edge* e) { return e->IsControlEdge(); }); + + if (ctrl_edges.empty()) { + return; + } + + // We can't merge control edges directly so we instead first "convert" them to + // normal values that can be merged, merge the values and then "convert" the + // merged value back into control. + // + // NB! We need to copy out the outgoing control edges before constructing + // old_ctrl_as_data otherwise the control edge from old_node to the constant + // in ControlToData will be present in ctrl_edges. + + Output old_ctrl_as_data = ControlToData(s, old_node); + Output new_ctrl_as_data = ControlToData(s, new_node); + + ops::Merge ctrl_merge_as_data(s.WithOpName("ctrl_merge"), + {old_ctrl_as_data, new_ctrl_as_data}); + Operation ctrl_merge = DataToControl(s, ctrl_merge_as_data.output); + + for (const Edge* e : ctrl_edges) { + s.graph()->AddControlEdge(ctrl_merge.node(), e->dst()); + s.graph()->RemoveControlEdge(e); + } +} + struct XlaClusterInfo { std::vector constant_inputs; std::vector non_constant_inputs; @@ -107,7 +195,38 @@ Status CopyIncomingControlEdges(Graph* g, Node* from, Node* to) { return Status::OK(); } -Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) { +void RemoveAllIncomingControlEdges(Graph* g, Node* n) { + std::vector incoming_ctrl_edges; + absl::c_copy_if(n->in_edges(), std::back_inserter(incoming_ctrl_edges), + [](const Edge* e) { return e->IsControlEdge(); }); + for (const Edge* e : incoming_ctrl_edges) { + g->RemoveControlEdge(e); + } +} + +// Returns true (into `result`) if `node` must be compiled. +Status NodeRequiresCompilation(Node* n, bool* result) { + DeviceType device_type(""); + TF_RETURN_IF_ERROR( + DeviceToDeviceType(n->assigned_device_name(), &device_type)); + const XlaOpRegistry::DeviceRegistration* registration = nullptr; + if (!XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration)) { + return errors::Internal("Could not find compilation device ", + device_type.type()); + } + *result = registration->requires_compilation; + return Status::OK(); +} + +Status ReplaceNodeWithXlaCompileAndXlaRun( + const FunctionLibraryDefinition& flib_def, bool lazy_compilation_enabled, + Graph* g, Node* n) { + bool requires_compilation; + TF_RETURN_IF_ERROR(NodeRequiresCompilation(n, &requires_compilation)); + if (!lazy_compilation_enabled) { + requires_compilation = true; + } + Status status; Scope root = NewInternalScope(g, &status, /*refiner=*/nullptr) .NewSubScope(n->name()) @@ -121,18 +240,63 @@ Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) { /*constants=*/cluster_info.constant_inputs, /*args=*/cluster_info.non_constant_inputs, /*resources=*/cluster_info.resource_inputs, + /*must_compile=*/requires_compilation, cluster_info.function); TF_RETURN_IF_ERROR( CopyIncomingControlEdges(g, /*from=*/n, /*to=*/xla_compile.key.node())); - std::vector xla_run_args = cluster_info.non_constant_inputs; - absl::c_copy(cluster_info.resource_inputs, std::back_inserter(xla_run_args)); - ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args, - xla_compile.key, n->output_types()); + if (requires_compilation) { + // "Strict" compilation: every _XlaCompile invocation must compile the + // cluster. + std::vector xla_run_args = cluster_info.non_constant_inputs; + absl::c_copy(cluster_info.resource_inputs, + std::back_inserter(xla_run_args)); + ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args, + xla_compile.key, n->output_types()); + + MoveOutgoingEdges(g, /*old_node=*/n, + /*new_node=*/xla_run.operation.node()); + g->RemoveNode(n); + } else { + // "Lazy" compilation: an _XlaCompile invocation may decide not to compile + // the cluster based on profitability heuristics. - MoveOutgoingEdges(g, /*old_node=*/n, - /*new_node=*/xla_run.operation.node()); - g->RemoveNode(n); + // We generate the following graph: + // + // (use_tf_call, use_xla_run) = + // Switch(pred=xla_compile.compilation_successful, + // value=xla_compile.key) + // + // tf_call_outputs = cluster_N(..., ^use_tf_call) + // xla_run_outputs = _XlaRun(..., key=use_xla_run) + // outputs = Merge(tf_call_outputs, xla_run_outputs). + ops::Switch s(root.WithOpName("predicated_compilation_key"), + xla_compile.key, xla_compile.compilation_successful); + Output predicated_compilation_key = s.output_true; + Output inverse_predicated_compilation_key = s.output_false; + + std::vector xla_run_args = cluster_info.non_constant_inputs; + absl::c_copy(cluster_info.resource_inputs, + std::back_inserter(xla_run_args)); + ops::_XlaRun xla_run(root.WithOpName("xla_run"), xla_run_args, + predicated_compilation_key, n->output_types()); + + MergeOutgoingControlEdges(root, /*old_node=*/n, + /*new_node=*/xla_run.operation.node()); + + MergeOutgoingDataEdges(root, /*old_node=*/n, + /*new_node=*/xla_run.operation.node()); + + TF_RETURN_IF_ERROR(root.status()); + + // We already have a TensorFlow function call into the cluster -- the + // original node we set out to rewrite. We just wire in the correct control + // deps and we're done. + RemoveAllIncomingControlEdges(g, n); + g->AddControlEdge( + DataToControl(root, inverse_predicated_compilation_key).node(), n); + n->ClearAttr(kXlaCompiledKernelAttr); + } return Status::OK(); } @@ -141,22 +305,34 @@ Status ReplaceNodeWithXlaCompileAndXlaRun(Graph* g, Node* n) { Status BuildXlaOpsPass::Run(const GraphOptimizationPassOptions& options) { Graph* graph = options.graph->get(); - for (Node* n : graph->op_nodes()) { - // In all cases, only try to compile computational nodes. - if (n->IsSend() || n->IsRecv() || n->IsControlFlow()) { - continue; - } + // Copy out the nodes we want to rewrite to avoid modifying the graph while we + // iterate on graph->op_nodes(). + std::vector xla_compiled_kernels; + absl::c_copy_if(graph->op_nodes(), std::back_inserter(xla_compiled_kernels), + [](const Node* n) { + if (n->IsSend() || n->IsRecv() || n->IsControlFlow()) { + return false; + } - // Only compile nodes that are marked for compilation by the - // compilation-marking pass (via 'attr_name'). - if (IsXlaCompiledKernel(*n)) { - TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndXlaRun(graph, n)); - } + // Only compile nodes that are marked for compilation by the + // compilation-marking pass (via 'attr_name'). + return IsXlaCompiledKernel(*n); + }); + + bool lazy_compilation_enabled = enable_lazy_compilation_ + ? *enable_lazy_compilation_ + : legacy_flags::GetBuildXlaOpsPassFlags() + .tf_xla_enable_lazy_compilation; + + for (Node* n : xla_compiled_kernels) { + TF_RETURN_IF_ERROR(ReplaceNodeWithXlaCompileAndXlaRun( + *options.flib_def, lazy_compilation_enabled, graph, n)); } if (VLOG_IS_ON(1)) { dump_graph::DumpGraphToFile("build_xla_ops", *graph, options.flib_def); } + return Status::OK(); } } // namespace tensorflow diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.h b/tensorflow/compiler/jit/build_xla_ops_pass.h index 1dd38fa95186dfbe458166caa23a131fbe3c9510..58f7c4b3a0d1472f602e8234f9f08c23dfe78a34 100644 --- a/tensorflow/compiler/jit/build_xla_ops_pass.h +++ b/tensorflow/compiler/jit/build_xla_ops_pass.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_JIT_BUILD_XLA_OPS_PASS_H_ #define TENSORFLOW_COMPILER_JIT_BUILD_XLA_OPS_PASS_H_ +#include "absl/types/optional.h" #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/lib/core/status.h" @@ -25,7 +26,17 @@ namespace tensorflow { // executes (using XLA) TF function calls marked with "_XlaCompiledKernel". class BuildXlaOpsPass : public GraphOptimizationPass { public: + // If enable_lazy_compilation is not nullopt then *enable_lazy_compilation + // overrides --tf_xla_enable_lazy_compilation flag in deciding whether lazy + // compilation is enabled. + explicit BuildXlaOpsPass( + absl::optional enable_lazy_compilation = absl::nullopt) + : enable_lazy_compilation_(enable_lazy_compilation) {} + Status Run(const GraphOptimizationPassOptions& options) override; + + private: + absl::optional enable_lazy_compilation_; }; } // namespace tensorflow diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc index 9d56db7b6bc12938b2de9df02b97ff0ca6a42e54..11df946cc186660242574c2644463a26ead44f1f 100644 --- a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc +++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc @@ -22,18 +22,44 @@ limitations under the License. #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" #include "tensorflow/compiler/jit/node_matchers.h" +#include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/grappler/optimizers/data/graph_utils.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" namespace tensorflow { namespace { +class BuildXlaOpsTest : public ::testing::Test { + protected: + void SetUp() override { + // This is needed to register the XLA_* devices. + CHECK(DeviceFactory::AddDevices( + SessionOptions(), "/job:localhost/replica:0/task:0", &devices_) + .ok()); + } + + void TearDown() override { + for (Device* device : devices_) { + delete device; + } + } + + private: + std::vector devices_; +}; + using ::tensorflow::testing::FindNodeByName; +using ::tensorflow::testing::matchers::Attr; using ::tensorflow::testing::matchers::CtrlDeps; +using ::tensorflow::testing::matchers::Inputs; using ::tensorflow::testing::matchers::NodeWith; using ::tensorflow::testing::matchers::Op; +using ::tensorflow::testing::matchers::Out; +using ::testing::_; Status BuildXlaOps(const Scope& s, std::unique_ptr* result) { auto graph = absl::make_unique(OpRegistry::Global()); @@ -42,15 +68,18 @@ Status BuildXlaOps(const Scope& s, std::unique_ptr* result) { // Assign all nodes to the CPU device. static const char* kCpuDevice = "/job:localhost/replica:0/task:0/cpu:0"; for (Node* n : graph->nodes()) { - if (n->assigned_device_name().empty()) { + if (n->requested_device().empty()) { n->set_assigned_device_name(kCpuDevice); + } else { + n->set_assigned_device_name(n->requested_device()); } } GraphOptimizationPassOptions opt_options; opt_options.graph = &graph; - BuildXlaOpsPass pass; + BuildXlaOpsPass pass(/*enable_lazy_compilation=*/true); TF_RETURN_IF_ERROR(pass.Run(opt_options)); + VLOG(3) << graph->ToGraphDefDebug().DebugString(); *result = std::move(graph); return Status::OK(); } @@ -76,16 +105,19 @@ Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name, result); } -Node* MakeWrite(const Scope& scope, const string& id) { - Output var_handle = - ops::VarHandleOp(scope.WithOpName("Var" + id), DT_FLOAT, TensorShape({})); - Output value_to_write = - ops::Const(scope.WithOpName("ValueToAssign" + id), 1.0f); - ops::AssignVariableOp assign_op(scope.WithOpName("Assignee" + id), var_handle, - value_to_write); +Node* MakeWrite(const Scope& scope, Output value_to_write, const string& id) { + Output var_handle = ops::VarHandleOp(scope.WithOpName("Var_" + id), DT_FLOAT, + TensorShape({})); + ops::AssignVariableOp assign_op(scope.WithOpName("Assignee_" + id), + var_handle, value_to_write); return assign_op.operation.node(); } +Node* MakeWrite(const Scope& scope, const string& id) { + return MakeWrite( + scope, ops::Const(scope.WithOpName("ValueToAssign" + id), 1.0f), id); +} + FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) { FunctionDefLibrary flib_def; FunctionDef func = FunctionDefHelper::Create( @@ -97,14 +129,16 @@ FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) { return flib_def; } -TEST(BuildXlaOps, ControlDepsPreserved) { - Scope root = Scope::NewRootScope().ExitOnError(); +TEST_F(BuildXlaOpsTest, ControlDepsPreserved) { + const char* kXlaDeviceName = "/job:worker/replica:0/task:0/device:XLA_CPU:0"; + Scope root = Scope::NewRootScope().WithDevice(kXlaDeviceName).ExitOnError(); FunctionDefLibrary flib_def = CreateFunctionDefLibWithConstFunction("cluster_0"); TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); Node* call; TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call)); + call->set_requested_device(kXlaDeviceName); Node* write_op = MakeWrite(root, "write"); root.graph()->AddControlEdge(call, write_op); @@ -116,15 +150,17 @@ TEST(BuildXlaOps, ControlDepsPreserved) { EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun"))))); } -TEST(BuildXlaOps, CleanFailureOnBogusAttr) { +TEST_F(BuildXlaOpsTest, CleanFailureOnBogusAttr) { Scope root = Scope::NewRootScope().ExitOnError(); FunctionDefLibrary flib_def = CreateFunctionDefLibWithConstFunction("cluster_0"); TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); + Node* call; TF_ASSERT_OK( MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", 100, 100, &call)); + Node* write_op = MakeWrite(root, "write"); root.graph()->AddControlEdge(call, write_op); @@ -134,5 +170,65 @@ TEST(BuildXlaOps, CleanFailureOnBogusAttr) { EXPECT_EQ(failure_status.code(), error::INVALID_ARGUMENT); } +TEST_F(BuildXlaOpsTest, OnNonXlaDevice) { + Scope root = Scope::NewRootScope().ExitOnError(); + + FunctionDefLibrary flib_def = + CreateFunctionDefLibWithConstFunction("cluster_0"); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); + + Node* call; + TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call)); + TF_ASSERT_OK(root.DoShapeInference(call)); + + Node* write_op = MakeWrite(root, Output(call), "write_result"); + + auto xla_compile = NodeWith(Op("_XlaCompile"), Attr("must_compile", false)); + auto predicated_compilation_key = + NodeWith(Op("Switch"), Inputs(Out(0, xla_compile), Out(1, xla_compile))); + auto xla_run = + NodeWith(Op("_XlaRun"), Inputs(Out(1, predicated_compilation_key))); + auto tf_call = + NodeWith(Op("cluster_0"), + CtrlDeps(NodeWith(Op("Identity"), + Inputs(Out(0, predicated_compilation_key))))); + auto merge = NodeWith(Op("Merge"), Inputs(Out(tf_call), Out(xla_run))); + auto assign_var = NodeWith(Op("AssignVariableOp"), Inputs(_, Out(merge))); + + std::unique_ptr graph; + TF_ASSERT_OK(BuildXlaOps(root, &graph)); + + Node* write_op_new = FindNodeByName(graph.get(), write_op->name()); + ASSERT_NE(write_op_new, nullptr); + EXPECT_THAT(write_op_new, assign_var); +} + +TEST_F(BuildXlaOpsTest, OnXlaDevice) { + const char* kXlaDeviceName = "/job:worker/replica:0/task:0/device:XLA_CPU:0"; + Scope root = Scope::NewRootScope().WithDevice(kXlaDeviceName).ExitOnError(); + + FunctionDefLibrary flib_def = + CreateFunctionDefLibWithConstFunction("cluster_0"); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); + + Node* call; + TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call)); + call->set_requested_device(kXlaDeviceName); + TF_ASSERT_OK(root.DoShapeInference(call)); + + Node* write_op = MakeWrite(root, Output(call), "write_result"); + + std::unique_ptr graph; + TF_ASSERT_OK(BuildXlaOps(root, &graph)); + + auto xla_op = + NodeWith(Op("_XlaRun"), Inputs(Out(NodeWith(Op("_XlaCompile"))))); + auto assign_var = + NodeWith(Op("AssignVariableOp"), Inputs(Out(NodeWith()), Out(xla_op))); + + Node* write_op_new = FindNodeByName(graph.get(), write_op->name()); + ASSERT_NE(write_op_new, nullptr); + EXPECT_THAT(write_op_new, assign_var); +} } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index da27f837e88fc3f57f865211929ec9cb1a1af779..da030b3bcc7aacae2306bec30f4b8927aa042d7c 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -1109,6 +1109,9 @@ Status Encapsulator::Subgraph::BuildFunctionDef( function_def_name_ = name; FunctionDef fdef; + // Verify that the graph has well-formed control flow structure. + std::vector dummy; + TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph_.get(), &dummy)); TF_RETURN_IF_ERROR(GraphToFunctionDef(*graph_, name, &fdef)); if (VLOG_IS_ON(1)) { @@ -1531,9 +1534,6 @@ Status Encapsulator::SplitIntoSubgraphs(FunctionLibraryDefinition* library) { for (auto& entry : subgraphs_) { Subgraph& subgraph = entry.second; FixupSourceAndSinkEdges(subgraph.GetGraph()); - // Verify that the graph has well-formed control flow structure. - std::vector dummy; - TF_RETURN_IF_ERROR(BuildControlFlowInfo(subgraph.GetGraph(), &dummy)); } if (VLOG_IS_ON(1)) { diff --git a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc index 22531a4acea3f130175c7cb2e03fcb7570926094..192e1c7b32467d80cef6ff61a1c7078f8dea9dfb 100644 --- a/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_xla_computations_pass_test.cc @@ -256,7 +256,7 @@ TEST(EncapsulateXlaComputations, Encapsulate) { TF_ASSERT_OK(EncapsulateXlaComputationsPass::Encapsulate(&graph, &flib_def)); - std::unordered_map index = BuildNodeIndex(*graph); + std::unordered_map index = graph->BuildNodeNameIndex(); string function = index.at("launch0")->type_string(); // Tests the outer graph is as expected. @@ -291,7 +291,8 @@ TEST(EncapsulateXlaComputations, Encapsulate) { // function. Encapsulation should be deterministic to avoid recompilation. TF_ASSERT_OK( EncapsulateXlaComputationsPass::Encapsulate(&graph_copy, &flib_def)); - std::unordered_map index_copy = BuildNodeIndex(*graph_copy); + std::unordered_map index_copy = + graph_copy->BuildNodeNameIndex(); string function_copy = index_copy.at("launch0")->type_string(); EXPECT_EQ(function, function_copy); } diff --git a/tensorflow/compiler/jit/kernels/xla_ops.cc b/tensorflow/compiler/jit/kernels/xla_ops.cc index accc86a86d9d3eca741994ee502bd7580ce49b2e..2268d9042860f6556cb69469ee52ad7cbbb81954 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.cc +++ b/tensorflow/compiler/jit/kernels/xla_ops.cc @@ -219,7 +219,7 @@ static Status BuildCompilationCache(OpKernelContext* ctx, static Status CompileToLocalExecutable( OpKernelContext* ctx, const NameAttrList& function, const XlaPlatformInfo& platform_info, absl::Span resources, - absl::Span constants, xla::LocalClient** client, + absl::Span constants, bool lazy, xla::LocalClient** client, std::map* variables, const XlaCompiler::CompilationResult** kernel, xla::LocalExecutable** executable) { @@ -277,7 +277,10 @@ static Status CompileToLocalExecutable( compile_options.always_return_tuple = false; return cache->Compile(options, function, constant_args, *variables, ctx, - compile_options, kernel, executable); + compile_options, + lazy ? XlaCompilationCache::CompileMode::kLazy + : XlaCompilationCache::CompileMode::kStrict, + kernel, executable); } void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { @@ -291,8 +294,8 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK( ctx, CompileToLocalExecutable(ctx, function_, platform_info_, resources_, - constants_, &client, &variables, &kernel, - &executable)); + constants_, /*lazy=*/false, &client, + &variables, &kernel, &executable)); se::Stream* stream = ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; @@ -394,9 +397,12 @@ XlaCompileOp::XlaCompileOp(OpKernelConstruction* ctx) resources_(ResourcesVector(ctx)), function_(FunctionAttr(ctx)) { OP_REQUIRES_OK(ctx, PlatformInfoFromContext(ctx, &platform_info_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("must_compile", &must_compile_)); } void XlaCompileOp::Compute(OpKernelContext* ctx) { + VLOG(3) << "XlaCompileOp " << def().name() + << (must_compile_ ? "(must-compile)" : ""); xla::LocalClient* client; const XlaCompiler::CompilationResult* kernel; xla::LocalExecutable* executable; @@ -404,8 +410,24 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) { OP_REQUIRES_OK( ctx, CompileToLocalExecutable(ctx, function_, platform_info_, resources_, - constants_, &client, &variables, &kernel, - &executable)); + constants_, /*lazy=*/!must_compile_, + &client, &variables, &kernel, &executable)); + + AllocatorAttributes host_alloc_attrs; + host_alloc_attrs.set_gpu_compatible(true); + host_alloc_attrs.set_on_host(true); + Allocator* cpu_allocator = ctx->device()->GetAllocator(host_alloc_attrs); + + if (!executable) { + DCHECK(!must_compile_); + Tensor compilation_key(cpu_allocator, DT_STRING, TensorShape({})); + + Tensor compilation_successful(cpu_allocator, DT_BOOL, TensorShape({})); + compilation_successful.scalar()() = false; + ctx->set_output(0, Tensor(cpu_allocator, DT_STRING, TensorShape({}))); + ctx->set_output(1, compilation_successful); + return; + } // Each execution of an XlaCompile op creates a new XlaExecutableClosure, even // if it didn't have to compile the cluster because of a compilation-cache @@ -415,13 +437,6 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) { XlaExecutableClosureStore::Global()->Produce(XlaExecutableClosure( client, executable, kernel, std::move(variables), constants_.size())); - Allocator* cpu_allocator = [&] { - AllocatorAttributes host_alloc_attrs; - host_alloc_attrs.set_gpu_compatible(true); - host_alloc_attrs.set_on_host(true); - return ctx->device()->GetAllocator(host_alloc_attrs); - }(); - Tensor compilation_key(cpu_allocator, DT_STRING, TensorShape({})); compilation_key.flat()(0) = key; @@ -437,6 +452,7 @@ XlaRunOp::XlaRunOp(OpKernelConstruction* ctx) : OpKernel(ctx) { } void XlaRunOp::Compute(OpKernelContext* ctx) { + VLOG(3) << "XlaRunOp " << def().name(); Tensor key_tensor = ctx->input(ctx->num_inputs() - 1); const XlaExecutableClosureStore::KeyT& key = key_tensor.flat()(0); @@ -491,6 +507,8 @@ REGISTER_KERNEL_BUILDER(Name("_XlaCompile").Device(DEVICE_CPU), XlaCompileOp); REGISTER_KERNEL_BUILDER(Name("_XlaCompile") .Device(DEVICE_GPU) .HostMemory("constants") + .HostMemory("key") + .HostMemory("compilation_successful") .HostMemory("resources"), XlaCompileOp); diff --git a/tensorflow/compiler/jit/kernels/xla_ops.h b/tensorflow/compiler/jit/kernels/xla_ops.h index 489d26eb30a66646158f39ea3fc6f55759c7f88e..ac90837e0d90943b93e2cdb01a30fa0837ba94df 100644 --- a/tensorflow/compiler/jit/kernels/xla_ops.h +++ b/tensorflow/compiler/jit/kernels/xla_ops.h @@ -151,6 +151,8 @@ class XlaCompileOp : public OpKernel { NameAttrList function_; XlaPlatformInfo platform_info_; + + bool must_compile_; }; class XlaRunOp : public OpKernel { diff --git a/tensorflow/compiler/jit/legacy_flags/BUILD b/tensorflow/compiler/jit/legacy_flags/BUILD index 07c5b2318851ed506711b9ee00c66fe680a3afd8..d8fe4026f51d8aa4b027aeedf0795ad30e28d986 100644 --- a/tensorflow/compiler/jit/legacy_flags/BUILD +++ b/tensorflow/compiler/jit/legacy_flags/BUILD @@ -39,3 +39,15 @@ cc_library( "//tensorflow/core:lib", ], ) + +cc_library( + name = "build_xla_ops_pass_flags", + srcs = ["build_xla_ops_pass_flags.cc"], + hdrs = ["build_xla_ops_pass_flags.h"], + deps = + [ + "//tensorflow/compiler/xla/legacy_flags:parse_flags_from_env", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], +) diff --git a/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.cc b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.cc new file mode 100644 index 0000000000000000000000000000000000000000..58157d2b9800a2e8269533607c2ea688ff4e7766 --- /dev/null +++ b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.cc @@ -0,0 +1,47 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include // NOLINT + +#include "tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h" +#include "tensorflow/compiler/xla/legacy_flags/parse_flags_from_env.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace tensorflow { +namespace legacy_flags { +namespace { + +BuildXlaOpsPassFlags* flags; +std::vector* flag_list; +std::once_flag flags_init; + +void AllocateAndParseFlags() { + flags = new BuildXlaOpsPassFlags; + flags->tf_xla_enable_lazy_compilation = false; + flag_list = new std::vector({ + Flag("tf_xla_enable_lazy_compilation", + &flags->tf_xla_enable_lazy_compilation, ""), + }); + xla::legacy_flags::ParseFlagsFromEnv(*flag_list); +} + +} // namespace + +const BuildXlaOpsPassFlags& GetBuildXlaOpsPassFlags() { + std::call_once(flags_init, &AllocateAndParseFlags); + return *flags; +} +} // namespace legacy_flags +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h new file mode 100644 index 0000000000000000000000000000000000000000..539314cbf72d38ed973b8a526aa6424b19ef344d --- /dev/null +++ b/tensorflow/compiler/jit/legacy_flags/build_xla_ops_pass_flags.h @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_BUILD_XLA_OPS_PASS_FLAGS_H_ +#define TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_BUILD_XLA_OPS_PASS_FLAGS_H_ + +namespace tensorflow { +namespace legacy_flags { + +// Flags for the build_xla_ops pass. +struct BuildXlaOpsPassFlags { + // Enables lazy compilation for TF/XLA (only when auto-clustering) if true. + // Defaults to false. + bool tf_xla_enable_lazy_compilation; +}; + +// Parses the flags in BuildXlaOpsPassFlags from the TF_XLA_FLAGS environment +// variable and returns a reference to the parsed copy. Parses TF_XLA_FLAGS +// only the first time this routine is called. +const BuildXlaOpsPassFlags& GetBuildXlaOpsPassFlags(); + +} // namespace legacy_flags +} // namespace tensorflow + +#endif // TENSORFLOW_COMPILER_JIT_LEGACY_FLAGS_BUILD_XLA_OPS_PASS_FLAGS_H_ diff --git a/tensorflow/compiler/jit/node_matchers.cc b/tensorflow/compiler/jit/node_matchers.cc index d8ace628e6b76e011ecddd4d526efc4db9c9237e..a09a6eb1553cb4bcf5587a7602097a40b64cfcdf 100644 --- a/tensorflow/compiler/jit/node_matchers.cc +++ b/tensorflow/compiler/jit/node_matchers.cc @@ -19,7 +19,10 @@ limitations under the License. #include "absl/algorithm/container.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" +#include "absl/strings/str_replace.h" #include "absl/strings/str_split.h" +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor.pb.h" namespace tensorflow { @@ -28,6 +31,7 @@ namespace matchers { namespace { using impl::NodeMatcherProperties; +using impl::OutEdge; string IndentAllButFirstLine(absl::string_view text) { std::vector lines = absl::StrSplit(text, '\n'); @@ -99,8 +103,6 @@ bool MatchAndExplainTensor(const Tensor& tensor, const Tensor& expected_tensor, } } -using Input = std::pair; - struct NodeMatcher : public ::testing::MatcherInterface { bool MatchAndExplain( const Node* node, @@ -191,6 +193,29 @@ struct NodeMatcher : public ::testing::MatcherInterface { } return false; } + + const AttrValueMap attr_value_map = node->def().attr(); + for (const auto& attr_kv_pair : attrs) { + auto it = attr_value_map.find(attr_kv_pair.first); + if (it == attr_value_map.end()) { + if (listener->IsInterested()) { + *listener << "did not find attribute named \"" << attr_kv_pair.first + << "\" in node"; + } + return false; + } + if (!AreAttrValuesEqual(it->second, attr_kv_pair.second)) { + if (listener->IsInterested()) { + *listener << "attribute named " << attr_kv_pair.first + << " does not match value; expected: \"" + << SummarizeAttrValue(attr_kv_pair.second) + << "\", found: \"" << SummarizeAttrValue(it->second) + << "\""; + } + return false; + } + } + return true; } @@ -232,7 +257,7 @@ struct NodeMatcher : public ::testing::MatcherInterface { *os << "matching " << ss.str(); } else { int edge_idx = 0; - for (const ::testing::Matcher& matcher : (*input_matchers)) { + for (const ::testing::Matcher& matcher : (*input_matchers)) { *os << "\n [" << edge_idx << "] matching ("; ::std::stringstream ss; matcher.DescribeTo(&ss); @@ -250,6 +275,19 @@ struct NodeMatcher : public ::testing::MatcherInterface { control_dep_set->DescribeTo(os); } + if (!attrs.empty()) { + printed_something = true; + std::vector attrs_str; + absl::c_transform(attrs, std::back_inserter(attrs_str), + [](const std::pair& attr_kv_pair) { + return absl::StrCat( + attr_kv_pair.first, "->", + SummarizeAttrValue(attr_kv_pair.second)); + }); + *os << " and attr values matching [" << absl::StrJoin(attrs_str, ", ") + << "]"; + } + if (!printed_something) { *os << "is any node"; } @@ -266,7 +304,7 @@ struct NodeMatcher : public ::testing::MatcherInterface { } ::testing::StringMatchResultListener inner_listener; - Input input = {edge->src(), edge->src_output()}; + OutEdge input = {edge->src(), edge->src_output()}; if ((*input_matchers)[input_idx].MatchAndExplain(input, &inner_listener)) { return true; } @@ -286,22 +324,24 @@ struct NodeMatcher : public ::testing::MatcherInterface { absl::optional name; absl::optional assigned_device; absl::optional constant_value; - absl::optional>> input_matchers; + absl::optional>> input_matchers; absl::optional<::testing::Matcher>> control_dep_set; + std::map attrs; }; // Matches a dst and dst_output on an input edge. Today we only use this with // dst_output=0 but we will eventually need to support multi-output operations. -class InputMatcher : public ::testing::MatcherInterface { +class OutEdgeMatcher : public ::testing::MatcherInterface { public: - InputMatcher(::testing::Matcher src_matcher, int src_output) - : src_matcher_(std::move(src_matcher)), src_output_(src_output) {} + OutEdgeMatcher(::testing::Matcher src_matcher, int src_oidx) + : src_matcher_(std::move(src_matcher)), src_oidx_(src_oidx) {} bool MatchAndExplain( - Input input, ::testing::MatchResultListener* listener) const override { + OutEdge out_edge, + ::testing::MatchResultListener* listener) const override { ::testing::StringMatchResultListener inner_listener; - if (!src_matcher_.MatchAndExplain(input.first, &inner_listener)) { + if (!src_matcher_.MatchAndExplain(out_edge.first, &inner_listener)) { if (listener->IsInterested()) { *listener << "\nsource does not match expected "; src_matcher_.DescribeTo(listener->stream()); @@ -312,10 +352,10 @@ class InputMatcher : public ::testing::MatcherInterface { } return false; } - if (input.second != src_output_) { + if (out_edge.second != src_oidx_) { if (listener->IsInterested()) { - *listener << "\nexpected output slot to be " << src_output_ - << " but found " << input.second; + *listener << "\nexpected output slot to be " << src_oidx_ + << " but found " << out_edge.second; } return false; } @@ -324,31 +364,21 @@ class InputMatcher : public ::testing::MatcherInterface { } void DescribeTo(::std::ostream* os) const override { - if (src_output_) { - *os << "output slot: " << src_output_ << ", source: ("; + if (src_oidx_) { + *os << "output slot: " << src_oidx_ << ", source: ("; } src_matcher_.DescribeTo(os); - if (src_output_) { + if (src_oidx_) { *os << ")"; } } private: ::testing::Matcher src_matcher_; - int src_output_; + int src_oidx_; }; - -std::vector<::testing::Matcher> NodeMatchersToInputMatchers( - absl::Span> node_matchers) { - std::vector<::testing::Matcher> result; - absl::c_transform(node_matchers, std::back_inserter(result), - [](::testing::Matcher n) { - return ::testing::MakeMatcher(new InputMatcher(n, 0)); - }); - return result; -} } // namespace ::testing::Matcher impl::NodeWith( @@ -375,10 +405,9 @@ std::vector<::testing::Matcher> NodeMatchersToInputMatchers( matcher->assigned_device = prop.assigned_device(); } - if (prop.input_nodes()) { + if (prop.inputs()) { DCHECK(!matcher->input_matchers); - matcher->input_matchers = - NodeMatchersToInputMatchers(*prop.input_nodes()); + matcher->input_matchers = *prop.inputs(); } if (prop.control_deps()) { @@ -386,6 +415,11 @@ std::vector<::testing::Matcher> NodeMatchersToInputMatchers( matcher->control_dep_set = ::testing::UnorderedElementsAreArray(*prop.control_deps()); } + + if (prop.attr()) { + auto insert_result = matcher->attrs.insert(*prop.attr()); + DCHECK(insert_result.second); + } } return ::testing::MakeMatcher(matcher); @@ -412,12 +446,12 @@ impl::NodeMatcherProperties AssignedDevice(string assigned_device) { } impl::NodeMatcherProperties impl::Inputs( - absl::Span> inputs) { - std::vector<::testing::Matcher> inputs_vector; + absl::Span> inputs) { + std::vector<::testing::Matcher> inputs_vector; absl::c_copy(inputs, std::back_inserter(inputs_vector)); impl::NodeMatcherProperties props; - props.set_input_nodes(std::move(inputs_vector)); + props.set_inputs(std::move(inputs_vector)); return props; } @@ -431,6 +465,19 @@ impl::NodeMatcherProperties impl::CtrlDeps( return props; } +std::pair impl::AttrLiteralHelper( + const std::pair& bool_attr) { + AttrValue attr_value; + attr_value.set_b(bool_attr.second); + return {bool_attr.first, attr_value}; +} + +impl::NodeMatcherProperties impl::Attr(std::pair attr) { + impl::NodeMatcherProperties props; + props.set_attr(std::move(attr)); + return props; +} + NodeMatcherProperties ConstantValue( const ::tensorflow::Input::Initializer& val) { TF_CHECK_OK(val.status); @@ -443,6 +490,10 @@ NodeMatcherProperties ConstantValue( const ::tensorflow::Input::Initializer& val) { return NodeWith(ConstantValue(val)); } +::testing::Matcher Out( + int oidx, ::testing::Matcher node_matcher) { + return ::testing::MakeMatcher(new OutEdgeMatcher(node_matcher, oidx)); +} } // namespace matchers Node* FindNodeByName(Graph* g, absl::string_view name) { @@ -455,4 +506,7 @@ Node* FindNodeByName(Graph* g, absl::string_view name) { return nullptr; } } // namespace testing + +void PrintTo(const Node* n, ::std::ostream* os) { *os << SummarizeNode(*n); } +void PrintTo(Node* n, ::std::ostream* os) { *os << SummarizeNode(*n); } } // namespace tensorflow diff --git a/tensorflow/compiler/jit/node_matchers.h b/tensorflow/compiler/jit/node_matchers.h index 0437a7e95c1eb3bdcdbe24a440dd90a5943c0894..35c2f5fd7b533d0e8716dc6c70c21afe9a32c9c8 100644 --- a/tensorflow/compiler/jit/node_matchers.h +++ b/tensorflow/compiler/jit/node_matchers.h @@ -19,7 +19,7 @@ limitations under the License. // // tensorflow::Node* node = ...; // EXPECT_THAT(node, NodeWith(Name("name"), Op("op"), -// Inputs(NodeWith(Name("input"))))) +// Inputs(Out(3, NodeWith(Name("input")))))) // // Matchable node properties (the expressions that go inside NodeWith(...)) // are: @@ -32,7 +32,8 @@ limitations under the License. // - AssignedDevice(string): matches the assigned device exactly. // // - Inputs(): matches the list of non-control inputs to the node -// exactly (i.e. does not match a suffix or a prefix). +// exactly (i.e. does not match a suffix or a prefix) where each element +// matches an output of a node (see Out(idx, node) below). // // - CtrlDeps(): matches the list of control dependences on the // node exactly but in any order. @@ -40,10 +41,16 @@ limitations under the License. // - ConstantValue(tensorflow::Input::Initializer init): matches a Const node // with the constant value `init`. Implies Op("Const"). // -// Node properties may not be repeated in a single NodeWith(...) matcher. -// E.g. NodeWith(Op("Foo"), Op("Bar")) will CHECK-fail. Since ConstantValue -// implies Op("Const"), a single NodeWith matcher can't have both -// ConstantValue(...) and Op(...). +// - Attr(name, value): Matches a single attribute with name `name` and value +// `value`. Right now only boolean values are supported. +// +// Overlapping node properties may not be repeated in a single NodeWith(...) +// matcher. E.g. NodeWith(Op("Foo"), Op("Bar")) will CHECK-fail. Since +// ConstantValue implies Op("Const"), a single NodeWith matcher can't have both +// ConstantValue(...) and Op(...). Multiple Attr() values can be combined as +// long as the attribute names are different. +// +// Out(idx, node) matches the `idx`'th output of a node that matches `node`. #ifndef TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_ #define TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_ @@ -66,6 +73,8 @@ namespace matchers { namespace impl { +using OutEdge = std::pair; + // ----------------------------------------------------------------------------- // Implementation details. @@ -74,6 +83,8 @@ namespace impl { class NodeMatcherProperties { public: using NodeSeqMatcher = std::vector<::testing::Matcher>; + using InputSeqMatcher = std::vector<::testing::Matcher>; + using AttrKeyValuePair = std::pair; const absl::optional& name() const { return name_; } const absl::optional& op() const { return op_; } @@ -83,12 +94,13 @@ class NodeMatcherProperties { const absl::optional& constant_value() const { return constant_value_; } - const absl::optional& input_nodes() const { - return input_nodes_; + const absl::optional& inputs() const { + return input_matchers_; } const absl::optional& control_deps() const { return control_deps_; } + const absl::optional& attr() const { return attr_; } void set_name(string name) { DCHECK(IsEmpty()); @@ -111,9 +123,9 @@ class NodeMatcherProperties { op_ = "Const"; } - void set_input_nodes(NodeSeqMatcher input_nodes) { + void set_inputs(InputSeqMatcher inputs) { DCHECK(IsEmpty()); - input_nodes_ = std::move(input_nodes); + input_matchers_ = std::move(inputs); } void set_control_deps(NodeSeqMatcher control_deps) { @@ -121,9 +133,14 @@ class NodeMatcherProperties { control_deps_ = std::move(control_deps); } + void set_attr(AttrKeyValuePair attr) { + DCHECK(IsEmpty()); + attr_ = std::move(attr); + } + bool IsEmpty() const { - return !name().has_value() && !op().has_value() && - !input_nodes().has_value() && !control_deps().has_value(); + return !name().has_value() && !op().has_value() && !inputs().has_value() && + !control_deps().has_value() && !attr().has_value(); } private: @@ -131,18 +148,24 @@ class NodeMatcherProperties { absl::optional op_; absl::optional assigned_device_; absl::optional constant_value_; - absl::optional input_nodes_; + absl::optional input_matchers_; absl::optional control_deps_; + absl::optional attr_; }; ::testing::Matcher NodeWith( absl::Span props); impl::NodeMatcherProperties Inputs( - absl::Span> inputs); + absl::Span> inputs); impl::NodeMatcherProperties CtrlDeps( absl::Span> control_deps); + +impl::NodeMatcherProperties Attr(std::pair attrs); + +std::pair AttrLiteralHelper( + const std::pair& bool_attr); } // namespace impl // ----------------------------------------------------------------------------- @@ -157,6 +180,13 @@ impl::NodeMatcherProperties Op(string op); // Matches a node with assigned device `assigned_device`. impl::NodeMatcherProperties AssignedDevice(string assigned_device); +// Matches a node with a boolean typed attrbute named `name` and with value +// `value`. +template +impl::NodeMatcherProperties Attr(const string& name, ValueTy value) { + return impl::Attr({impl::AttrLiteralHelper({name, value})}); +} + // Matches a node with inputs `inputs`. // // `inputs` are ordered; `inputs`[i] must match input i. @@ -165,6 +195,15 @@ impl::NodeMatcherProperties Inputs(Ts... inputs) { return impl::Inputs({inputs...}); } +// Matches the `idx`'th output of a node that matches `node`. +::testing::Matcher Out(int oidx, + ::testing::Matcher node); + +// Matches the first output of a node that matches `node`. +::testing::Matcher Out(::testing::Matcher node) { + return Out(0, node); +} + // Matches a node with control dependences `control_deps`. // // `control_deps` are unordered and will match the control deps of a node in any @@ -192,6 +231,9 @@ template // If `g` has a node named `name` returns it, otherwise returns null. Node* FindNodeByName(Graph* g, absl::string_view name); } // namespace testing + +void PrintTo(const Node* n, ::std::ostream* os); +void PrintTo(Node* n, ::std::ostream* os); } // namespace tensorflow #endif // TENSORFLOW_COMPILER_JIT_NODE_MATCHERS_H_ diff --git a/tensorflow/compiler/jit/node_matchers_test.cc b/tensorflow/compiler/jit/node_matchers_test.cc index 93a8994307b38ac240c22d0a18268638ac7620ae..c3f0dfece85573d71dbfa21eba5af70b674fe71e 100644 --- a/tensorflow/compiler/jit/node_matchers_test.cc +++ b/tensorflow/compiler/jit/node_matchers_test.cc @@ -18,6 +18,8 @@ limitations under the License. #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/control_flow_ops.h" +#include "tensorflow/cc/ops/control_flow_ops_internal.h" #include "tensorflow/cc/ops/math_ops.h" namespace tensorflow { @@ -27,12 +29,14 @@ namespace { using ::testing::_; using testing::matchers::AssignedDevice; +using testing::matchers::Attr; using testing::matchers::ConstantValue; using testing::matchers::CtrlDeps; using testing::matchers::Inputs; using testing::matchers::Name; using testing::matchers::NodeWith; using testing::matchers::Op; +using testing::matchers::Out; template string Explain(const T& t, const M& m) { @@ -61,7 +65,7 @@ TEST(NodeMatchers, CheckAgainstConstant) { "\nexpected op Add but found Placeholder"); EXPECT_EQ(Explain(placeholder.node(), NodeWith(Name("add"))), "\nexpected name add but found placeholder"); - EXPECT_EQ(Explain(placeholder.node(), NodeWith(Inputs(NodeWith()))), + EXPECT_EQ(Explain(placeholder.node(), NodeWith(Inputs(Out(NodeWith())))), "\nexpected 1 inputs but node has 0"); } @@ -74,18 +78,19 @@ TEST(NodeMatchers, CheckAgainstBinary) { ops::Placeholder(root.WithOpName("placeholder_b"), DT_FLOAT); Output add = ops::Add(root.WithOpName("add"), placeholder_a, placeholder_b); - EXPECT_THAT(add.node(), NodeWith(Op("Add"), Name("add"), - Inputs(NodeWith(Name("placeholder_a")), - NodeWith(Name("placeholder_b"))))); + EXPECT_THAT(add.node(), + NodeWith(Op("Add"), Name("add"), + Inputs(Out(NodeWith(Name("placeholder_a"))), + Out(NodeWith(Name("placeholder_b")))))); EXPECT_EQ(Explain(add.node(), NodeWith(Inputs())), "\nexpected 0 inputs but node has 2"); EXPECT_EQ( - Explain(add.node(), NodeWith(Inputs(NodeWith(Name("blah")), _))), + Explain(add.node(), NodeWith(Inputs(Out(NodeWith(Name("blah"))), _))), "\ninput 0 does not match expected:\nname: blah, \nsource does not match " "expected name: blah\n\t\nexpected name blah but found placeholder_a"); EXPECT_EQ( - Explain(add.node(), NodeWith(Inputs(_, NodeWith(Name("blah"))))), + Explain(add.node(), NodeWith(Inputs(_, Out(NodeWith(Name("blah")))))), "\ninput 1 does not match expected:\nname: blah, \nsource does not match " "expected name: blah\n\t\nexpected name blah but found placeholder_b"); } @@ -174,6 +179,36 @@ TEST(NodeMatchers, AssignedDevice) { "/job:localhost/replica:0/task:0/device:CPU:0 but found \"\""); } +TEST(NodeMatchers, OutputIndices) { + Scope root = Scope::NewRootScope().ExitOnError(); + Output pred = ops::Placeholder(root.WithOpName("pred"), DT_BOOL); + + Output data = ops::Placeholder(root.WithOpName("data"), DT_FLOAT); + ops::Switch sw(root.WithOpName("switch"), data, pred); + Output add = ops::Add(root.WithOpName("add"), sw.output_true, + ops::Placeholder(root.WithOpName("addend"), DT_FLOAT)); + + EXPECT_THAT(add.node(), NodeWith(Inputs(Out(1, NodeWith(Op("Switch"))), _))); + EXPECT_EQ( + Explain(add.node(), NodeWith(Inputs(Out(0, NodeWith(Op("Switch"))), _))), + "\ninput 0 does not match expected:\nop: Switch, \nexpected output slot " + "to be 0 but found 1"); +} + +TEST(NodeMatchers, Attrs) { + Scope root = Scope::NewRootScope().ExitOnError(); + Output enter = ops::internal::Enter( + root.WithOpName("enter"), + ops::Placeholder(root.WithOpName("data"), DT_FLOAT), "frame_name", + ops::internal::Enter::Attrs{}.IsConstant(true)); + EXPECT_THAT(enter.node(), NodeWith(Attr("is_constant", true))); + EXPECT_EQ(Explain(enter.node(), NodeWith(Attr("is_constant", false))), + "attribute named is_constant does not match value; expected: " + "\"false\", found: \"true\""); + EXPECT_EQ(Explain(enter.node(), NodeWith(Attr("missing_attr", false))), + "did not find attribute named \"missing_attr\" in node"); +} + } // namespace } // namespace testing } // namespace tensorflow diff --git a/tensorflow/compiler/jit/ops/xla_ops.cc b/tensorflow/compiler/jit/ops/xla_ops.cc index bcd1a29b1ff789b5674a21ff66cc6d23a809afc5..95d12e95fd9a0d1cca513ee74a0651ea69eba89e 100644 --- a/tensorflow/compiler/jit/ops/xla_ops.cc +++ b/tensorflow/compiler/jit/ops/xla_ops.cc @@ -54,6 +54,7 @@ REGISTER_OP("XlaClusterOutput") REGISTER_OP("_XlaCompile") .Input("constants: Tconstants") .Attr("Tconstants: list(type) >= 0") + .Attr("must_compile: bool") .Input("args: Targs") .Attr("Targs: list(type) >= 0") .Input("resources: Nresources * resource") @@ -71,8 +72,12 @@ that _XlaRun can use to look up the LocalExecutable and execute it. key: A key that can be used to look up the local executable compiled by the node and associated metadata. -compilation_successful: True iff the compilation was successful. Always true -for now. +compilation_successful: If the `must_compile` attr is false the _XlaCompile op + can decide not to compile the clusters based on some profitability + heuristics. In that case `compilation_successful` is false if _XlaCompile + chose not to compile the cluster. If the `must_compile` attr is true then + _XlaCompile always attempts to compile the cluster and + `compilation_successful` is always true. )"); REGISTER_OP("_XlaRun") diff --git a/tensorflow/compiler/jit/partially_decluster_pass.cc b/tensorflow/compiler/jit/partially_decluster_pass.cc index b1f9e9088f391cb8813d2c82395ffcc0b2081cae..5b9610322336acbcede0bef0538043b8ff917c16 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/framework/memory_types.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op_kernel.h" namespace tensorflow { namespace { @@ -206,18 +207,27 @@ bool IsIntraClusterEdge(const Edge& edge) { return src_cluster_name.has_value() && src_cluster_name == dst_cluster_name; } -Status MustCompileNode(const Node* n, bool* result) { +bool IsMustCompileDevice(const DeviceType& device_type) { + const XlaOpRegistry::DeviceRegistration* registration; + if (XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration)) { + return registration->requires_compilation; + } + + return false; +} + +Status MustCompileNode(const Node* n, bool* must_compile) { DeviceType device_type(""); TF_RETURN_IF_ERROR( DeviceToDeviceType(n->assigned_device_name(), &device_type)); - const XlaOpRegistry::DeviceRegistration* registration; - if (!XlaOpRegistry::GetCompilationDevice(device_type.type(), ®istration)) { - *result = false; - } else { - *result = registration->requires_compilation; + if (IsMustCompileDevice(device_type)) { + *must_compile = true; + return Status::OK(); } + // We must compile `n` if it does not have a TensorFlow kernel. + *must_compile = !FindKernelDef(device_type, n->def(), nullptr, nullptr).ok(); return Status::OK(); } diff --git a/tensorflow/compiler/jit/partially_decluster_pass_test.cc b/tensorflow/compiler/jit/partially_decluster_pass_test.cc index 0feb73a89e7050e8c413e5a733da1d87775b0ba3..74d5ef57184197ad6e9e5048722e84863756a3f5 100644 --- a/tensorflow/compiler/jit/partially_decluster_pass_test.cc +++ b/tensorflow/compiler/jit/partially_decluster_pass_test.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/xla_cluster_util.h" +#include "tensorflow/compiler/tf2xla/cc/ops/xla_ops.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" #include "tensorflow/core/framework/node_def_util.h" @@ -405,5 +406,36 @@ TEST(PartiallyDeclusterPassTest, DontDeclusterXlaDeviceOps) { } } +TEST(PartiallyDeclusterPassTest, DontDeclusterNonTensorFlowOps) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output dynamic_slice_operand = + ops::Placeholder(s.WithOpName("dynamic_slice_operand"), DT_INT32, + ops::Placeholder::Attrs{}); + Output dynamic_slice_begin = ops::Placeholder( + s.WithOpName("dynamic_slice_begin"), DT_INT32, ops::Placeholder::Attrs{}); + Output dynamic_slice_size = ops::Placeholder( + s.WithOpName("dynamic_slice_size"), DT_INT32, ops::Placeholder::Attrs{}); + Output dynamic_slice = + ops::XlaDynamicSlice(s.WithOpName("dynamic_slice"), dynamic_slice_operand, + dynamic_slice_begin, dynamic_slice_size); + + Output reshape_input = ops::Placeholder(s.WithOpName("reshape_input"), + DT_FLOAT, ops::Placeholder::Attrs{}); + Output reshape = + ops::Reshape(s.WithOpName("reshape"), reshape_input, dynamic_slice); + + AddToCluster({dynamic_slice.node(), reshape.node()}, "cluster_0"); + + std::unique_ptr graph = absl::make_unique(OpRegistry::Global()); + TF_ASSERT_OK(s.ToGraph(graph.get())); + + Node* n = FindNodeByName(*graph, "dynamic_slice"); + ASSERT_NE(n, nullptr); + + TF_ASSERT_OK(PartiallyDecluster(&graph)); + + EXPECT_EQ(GetXlaClusterForNode(*n), "cluster_0"); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 0471995015bb080016b523305c90a3e42163a039..826e98b96620165604594a22b81cd02422605c12 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -40,6 +40,7 @@ namespace tensorflow { XlaCompilationCache::XlaCompilationCache(xla::LocalClient* client, DeviceType device_type) : client_(client), device_type_(std::move(device_type)) {} + XlaCompilationCache::~XlaCompilationCache() { // Ensure any use of our programs have completed by waiting for all stream // executors to complete. @@ -229,10 +230,15 @@ Status XlaCompilationCache::Compile( const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompileOptions& compile_options, + CompileMode compile_mode, const XlaCompiler::CompilationResult** out_compilation_result, xla::LocalExecutable** out_executable) { + // Set the compile threshold to 1 to implement CompileMode::kStrict. + int64 compile_threshold = + compile_mode == CompileMode::kLazy ? kDefaultCompilationThreshold : 1; return CompileImpl(options, function, constant_args, variable_args, ctx, compile_options, /*compile_single_op=*/false, + /*compile_threshold=*/compile_threshold, out_compilation_result, out_executable); } @@ -247,9 +253,10 @@ Status XlaCompilationCache::CompileSingleOp( NameAttrList name; name.set_name(def.op()); *name.mutable_attr() = def.attr(); - return CompileImpl( - options, name, constant_args, variable_args, ctx, compile_options, - /*compile_single_op=*/true, out_compilation_result, out_executable); + return CompileImpl(options, name, constant_args, variable_args, ctx, + compile_options, + /*compile_single_op=*/true, /*compile_threshold=*/1, + out_compilation_result, out_executable); } Status XlaCompilationCache::CompileImpl( @@ -257,6 +264,7 @@ Status XlaCompilationCache::CompileImpl( const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompileOptions& compile_options, bool compile_single_op, + int64 compile_threshold, const XlaCompiler::CompilationResult** out_compilation_result, xla::LocalExecutable** out_executable) { DCHECK_NE(out_executable, nullptr); @@ -310,9 +318,18 @@ Status XlaCompilationCache::CompileImpl( // TODO(phawkins): this locking will need to be restructured when we implement // cache eviction. mutex_lock entry_lock(entry->mu); + int64 current_request_count = ++entry->request_count; if (!entry->compiled) { VLOG(2) << "Compilation cache miss for signature: " - << SignatureDebugString(signature); + << SignatureDebugString(signature) << " with request count " + << current_request_count << " and compile threshold " + << compile_threshold; + if (current_request_count < compile_threshold) { + *out_compilation_result = nullptr; + *out_executable = nullptr; + return Status::OK(); + } + tensorflow::Env* env = tensorflow::Env::Default(); const uint64 compile_start_us = env->NowMicros(); // Do the actual JIT compilation without holding the lock (it can take diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h index 75c7758f730f9f2f8251c02e7fac1a01f8cc9c2b..f06a991818db53adb3e5c0cc483c6180128a87e7 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.h +++ b/tensorflow/compiler/jit/xla_compilation_cache.h @@ -50,6 +50,11 @@ class XlaCompilationCache : public ResourceBase { XlaCompilationCache(xla::LocalClient* client, DeviceType device_type); ~XlaCompilationCache() override; + enum class CompileMode { + kLazy, + kStrict, + }; + // Compiles a function into a XlaCompiler::CompilationResult that can be used // to execute an XLA Computation. Compilation results are cached. // `function` is the name of a Tensorflow function to compile. @@ -58,6 +63,14 @@ class XlaCompilationCache : public ResourceBase { // `variable_args` is a snapshot of the current values of the // resource variable arguments to `function`; uninitialized variables are // represented by an absent OptionalTensor. + // + // `compile_mode` controls the behavior of the compilation cache on a cache + // miss. If `compile_mode` is `kLazy` then, based on some profitability + // heuristics, the compilation cache may decide not to compile the cluster at + // this time. In this case it returns null into both `out_compilation_result` + // and `out_executable`. If `compile_mode` is `kStrict` then the compilation + // cache always attempts the compilation on a cache miss. + // // The result of compilation is written to `*compilation_result`, which must // be non-null. If `executable` is non-null, also builds an // xla::LocalExecutable and sets `executable` to point to it. The resulting @@ -69,6 +82,7 @@ class XlaCompilationCache : public ResourceBase { const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompileOptions& compile_options, + CompileMode compile_mode, const XlaCompiler::CompilationResult** out_compilation_result, xla::LocalExecutable** out_executable); @@ -94,7 +108,7 @@ class XlaCompilationCache : public ResourceBase { const std::map& constant_args, const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompileOptions& compile_options, - bool compile_single_op, + bool compile_single_op, int64 compile_threshold, const XlaCompiler::CompilationResult** out_compilation_result, xla::LocalExecutable** out_executable); @@ -139,6 +153,9 @@ class XlaCompilationCache : public ResourceBase { // Have we tried compiling this entry? bool compiled = false; + // The number of times a compilation with this signature has been requested. + int64 request_count = 0; + // Did compilation succeed? Status compilation_status GUARDED_BY(mu); @@ -167,6 +184,10 @@ class XlaCompilationCache : public ResourceBase { absl::flat_hash_map compile_stats_ GUARDED_BY(compile_stats_mu_); + // The number of times a lazy compilation must be requested for a specific + // signature before we attempt to compile it. + static constexpr int64 kDefaultCompilationThreshold = 2; + TF_DISALLOW_COPY_AND_ASSIGN(XlaCompilationCache); }; diff --git a/tensorflow/compiler/jit/xla_device_ops.h b/tensorflow/compiler/jit/xla_device_ops.h index 6967ad1f03fb5dd962d5b41f0c7ab1dfa42fab94..14a232b7a8a41f6b4401b2f9de58623af9b1205e 100644 --- a/tensorflow/compiler/jit/xla_device_ops.h +++ b/tensorflow/compiler/jit/xla_device_ops.h @@ -65,11 +65,13 @@ class XlaAssignVariableOp : public AsyncOpKernel { .HostMemory("resources"), \ KERNEL); -#define REGISTER_XLA_COMPILE_KERNEL(DEVICE, KERNEL, TYPES) \ - REGISTER_KERNEL_BUILDER(Name("_XlaCompile") \ - .Device(DEVICE) \ - .HostMemory("constants") \ - .HostMemory("resources"), \ +#define REGISTER_XLA_COMPILE_KERNEL(DEVICE, KERNEL, TYPES) \ + REGISTER_KERNEL_BUILDER(Name("_XlaCompile") \ + .Device(DEVICE) \ + .HostMemory("constants") \ + .HostMemory("key") \ + .HostMemory("compilation_successful") \ + .HostMemory("resources"), \ KERNEL); #define REGISTER_XLA_RUN_KERNEL(DEVICE, KERNEL, TYPES) \ diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc index 19e681af0c940023de2ce82b3b337babe2f3dd5a..8a80639b6391ba9b73fe3143df8f6e44505cec2c 100644 --- a/tensorflow/compiler/jit/xla_interpreter_device.cc +++ b/tensorflow/compiler/jit/xla_interpreter_device.cc @@ -25,8 +25,9 @@ namespace tensorflow { const char* const DEVICE_XLA_INTERPRETER = "XLA_INTERPRETER"; const char* const DEVICE_INTERPRETER_XLA_JIT = "XLA_INTERPRETER_JIT"; -constexpr std::array kExecAllTypes = { - {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; +constexpr std::array kExecAllTypes = { + {DT_INT8, DT_INT32, DT_INT64, DT_HALF, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, + DT_BOOL, DT_BFLOAT16}}; class XlaInterpreterDeviceFactory : public DeviceFactory { public: diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index ba2401ed2628beeba2be3bf59a067c3d87ca3f9f..a8a9f39e10620499237c77883925a0223298a2b4 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -1089,6 +1089,7 @@ cuda_py_test( size = "medium", srcs = ["jit_test.py"], additional_deps = [ + ":test_utils", "//tensorflow/contrib/compiler:compiler_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", @@ -1107,6 +1108,7 @@ cuda_py_test( size = "small", srcs = ["dense_layer_test.py"], additional_deps = [ + ":test_utils", "//tensorflow/contrib/compiler:compiler_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/compiler/tests/dense_layer_test.py b/tensorflow/compiler/tests/dense_layer_test.py index 9390870e07d6b5bd90dbc5c04bac0946595dcf7f..d1b90f098d7d6574999ba0af44b285f5ad5e4f8d 100644 --- a/tensorflow/compiler/tests/dense_layer_test.py +++ b/tensorflow/compiler/tests/dense_layer_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import os import numpy as np +from tensorflow.compiler.tests import test_utils from tensorflow.contrib.compiler import jit from tensorflow.core.protobuf import config_pb2 from tensorflow.python.layers import layers @@ -30,7 +31,6 @@ from tensorflow.python.platform import test jit_scope = jit.experimental_jit_scope - def GetRunMetadataLabels(run_metadata): """Returns all labels in run_metadata.""" labels = [] @@ -68,13 +68,14 @@ class DenseLayerTest(test.TestCase): config.graph_options.optimizer_options.global_jit_level = ( config_pb2.OptimizerOptions.ON_1) - with self.test_session(config=config) as sess: + with self.session(config=config) as sess: x = array_ops.placeholder(shape=[None, None, 3], dtype=np.float32) y = layers.dense(x, 3) sess.run(variables.initialize_all_variables()) run_metadata = config_pb2.RunMetadata() - sess.run( + test_utils.RunWithWarmup( + sess, y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])}, run_metadata=run_metadata, options=config_pb2.RunOptions( @@ -98,7 +99,8 @@ class DenseLayerTest(test.TestCase): sess.run(variables.initialize_all_variables()) run_metadata = config_pb2.RunMetadata() - sess.run( + test_utils.RunWithWarmup( + sess, y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])}, run_metadata=run_metadata, options=config_pb2.RunOptions( @@ -126,7 +128,8 @@ class DenseLayerTest(test.TestCase): sess.run(variables.initialize_all_variables()) run_metadata = config_pb2.RunMetadata() - sess.run( + test_utils.RunWithWarmup( + sess, y, {x: np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])}, run_metadata=run_metadata, options=config_pb2.RunOptions( @@ -138,4 +141,6 @@ class DenseLayerTest(test.TestCase): if __name__ == "__main__": + os.environ["TF_XLA_FLAGS"] = ("--tf_xla_enable_lazy_compilation=true " + + os.environ.get("TF_XLA_FLAGS", "")) test.main() diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py index 68fdb5caf4c2a496b5058cdda40ca650484a6e0e..d67b16f8e9e7320d5717b0203be340a2356e53d0 100644 --- a/tensorflow/compiler/tests/image_ops_test.py +++ b/tensorflow/compiler/tests/image_ops_test.py @@ -26,7 +26,6 @@ import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.compiler.tests import xla_test -from tensorflow.python.compat import compat from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -605,168 +604,205 @@ class ResizeBilinearTest(xla_test.XLATestCase): class NonMaxSuppressionTest(xla_test.XLATestCase): def testNMS128From1024(self): - with compat.forward_compatibility_horizon(2018, 8, 8): - num_boxes = 1024 - boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4") - scores_np = np.random.normal(0.5, 0.1, (num_boxes,)).astype("f4") - - max_output_size = 128 - iou_threshold_np = np.array(0.5, dtype=np.float32) - score_threshold_np = np.array(0.0, dtype=np.float32) - - with self.cached_session() as sess: - boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) - scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) - iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, - iou_threshold_np.shape) - score_threshold = array_ops.placeholder(score_threshold_np.dtype, - score_threshold_np.shape) - with self.test_scope(): - selected_indices = image_ops.non_max_suppression_padded( - boxes=boxes, - scores=scores, - max_output_size=max_output_size, - iou_threshold=iou_threshold, - score_threshold=score_threshold, - pad_to_max_output_size=True) - inputs_feed = { - boxes: boxes_np, - scores: scores_np, - score_threshold: score_threshold_np, - iou_threshold: iou_threshold_np - } - (indices_tf, _) = sess.run(selected_indices, feed_dict=inputs_feed) - - self.assertEqual(indices_tf.size, max_output_size) + num_boxes = 1024 + boxes_np = np.random.normal(50, 10, (num_boxes, 4)).astype("f4") + scores_np = np.random.normal(0.5, 0.1, (num_boxes,)).astype("f4") + + max_output_size = 128 + iou_threshold_np = np.array(0.5, dtype=np.float32) + score_threshold_np = np.array(0.0, dtype=np.float32) + + with self.cached_session() as sess: + boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) + scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) + iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, + iou_threshold_np.shape) + score_threshold = array_ops.placeholder(score_threshold_np.dtype, + score_threshold_np.shape) + with self.test_scope(): + selected_indices = image_ops.non_max_suppression_padded( + boxes=boxes, + scores=scores, + max_output_size=max_output_size, + iou_threshold=iou_threshold, + score_threshold=score_threshold, + pad_to_max_output_size=True) + inputs_feed = { + boxes: boxes_np, + scores: scores_np, + score_threshold: score_threshold_np, + iou_threshold: iou_threshold_np + } + (indices_tf, _) = sess.run(selected_indices, feed_dict=inputs_feed) + + self.assertEqual(indices_tf.size, max_output_size) def testNMS3From6Boxes(self): - with compat.forward_compatibility_horizon(2018, 8, 8): - # Three boxes are selected based on IOU. - boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], - [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] - boxes_np = np.array(boxes_data, dtype=np.float32) - - scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3] - scores_np = np.array(scores_data, dtype=np.float32) - - max_output_size = 3 - iou_threshold_np = np.array(0.5, dtype=np.float32) - score_threshold_np = np.array(0.0, dtype=np.float32) - - with self.cached_session() as sess: - boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) - scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) - iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, - iou_threshold_np.shape) - score_threshold = array_ops.placeholder(score_threshold_np.dtype, - score_threshold_np.shape) - with self.test_scope(): - selected_indices = image_ops.non_max_suppression_padded( - boxes=boxes, - scores=scores, - max_output_size=max_output_size, - iou_threshold=iou_threshold, - score_threshold=score_threshold, - pad_to_max_output_size=True) - inputs_feed = { - boxes: boxes_np, - scores: scores_np, - score_threshold: score_threshold_np, - iou_threshold: iou_threshold_np - } - (indices_tf, num_valid) = sess.run( - selected_indices, feed_dict=inputs_feed) - - self.assertEqual(indices_tf.size, max_output_size) - self.assertEqual(num_valid, 3) - self.assertAllClose(indices_tf[:num_valid], [3, 0, 5]) + # Three boxes are selected based on IOU. + boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], + [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] + boxes_np = np.array(boxes_data, dtype=np.float32) + + scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3] + scores_np = np.array(scores_data, dtype=np.float32) + + max_output_size = 3 + iou_threshold_np = np.array(0.5, dtype=np.float32) + score_threshold_np = np.array(0.0, dtype=np.float32) + + with self.cached_session() as sess: + boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) + scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) + iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, + iou_threshold_np.shape) + score_threshold = array_ops.placeholder(score_threshold_np.dtype, + score_threshold_np.shape) + with self.test_scope(): + selected_indices = image_ops.non_max_suppression_padded( + boxes=boxes, + scores=scores, + max_output_size=max_output_size, + iou_threshold=iou_threshold, + score_threshold=score_threshold, + pad_to_max_output_size=True) + inputs_feed = { + boxes: boxes_np, + scores: scores_np, + score_threshold: score_threshold_np, + iou_threshold: iou_threshold_np + } + (indices_tf, num_valid) = sess.run( + selected_indices, feed_dict=inputs_feed) + + self.assertEqual(indices_tf.size, max_output_size) + self.assertEqual(num_valid, 3) + self.assertAllClose(indices_tf[:num_valid], [3, 0, 5]) def testNMS3Then2WithScoreThresh(self): # Three boxes are selected based on IOU. # One is filtered out by score threshold. - with compat.forward_compatibility_horizon(2018, 8, 8): - boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], - [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] - boxes_np = np.array(boxes_data, dtype=np.float32) - - scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3] - scores_np = np.array(scores_data, dtype=np.float32) - max_output_size = 3 - iou_threshold_np = np.array(0.5, dtype=np.float32) - score_threshold_np = np.array(0.4, dtype=np.float32) - - with self.cached_session() as sess: - boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) - scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) - iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, - iou_threshold_np.shape) - score_threshold = array_ops.placeholder(score_threshold_np.dtype, - score_threshold_np.shape) - with self.test_scope(): - selected_indices = image_ops.non_max_suppression_padded( - boxes=boxes, - scores=scores, - max_output_size=max_output_size, - iou_threshold=iou_threshold, - score_threshold=score_threshold, - pad_to_max_output_size=True) - inputs_feed = { - boxes: boxes_np, - scores: scores_np, - iou_threshold: iou_threshold_np, - score_threshold: score_threshold_np - } - (indices_tf, num_valid) = sess.run( - selected_indices, feed_dict=inputs_feed) - - self.assertEqual(indices_tf.size, max_output_size) - self.assertEqual(num_valid, 2) - self.assertAllClose(indices_tf[:num_valid], [3, 0]) + boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], + [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] + boxes_np = np.array(boxes_data, dtype=np.float32) + + scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3] + scores_np = np.array(scores_data, dtype=np.float32) + max_output_size = 3 + iou_threshold_np = np.array(0.5, dtype=np.float32) + score_threshold_np = np.array(0.4, dtype=np.float32) + + with self.cached_session() as sess: + boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) + scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) + iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, + iou_threshold_np.shape) + score_threshold = array_ops.placeholder(score_threshold_np.dtype, + score_threshold_np.shape) + with self.test_scope(): + selected_indices = image_ops.non_max_suppression_padded( + boxes=boxes, + scores=scores, + max_output_size=max_output_size, + iou_threshold=iou_threshold, + score_threshold=score_threshold, + pad_to_max_output_size=True) + inputs_feed = { + boxes: boxes_np, + scores: scores_np, + iou_threshold: iou_threshold_np, + score_threshold: score_threshold_np + } + (indices_tf, num_valid) = sess.run( + selected_indices, feed_dict=inputs_feed) + + self.assertEqual(indices_tf.size, max_output_size) + self.assertEqual(num_valid, 2) + self.assertAllClose(indices_tf[:num_valid], [3, 0]) def testNMS3Then1WithScoreMaxThresh(self): # Three boxes are selected based on IOU. # One is filtered out by score threshold. # One is filtered out by max_output_size. - with compat.forward_compatibility_horizon(2018, 8, 8): - boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], - [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] - boxes_np = np.array(boxes_data, dtype=np.float32) - - scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3] - scores_np = np.array(scores_data, dtype=np.float32) - max_output_size = 1 - iou_threshold_np = np.array(0.5, dtype=np.float32) - score_threshold_np = np.array(0.4, dtype=np.float32) - - with self.cached_session() as sess: - boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) - scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) - iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, - iou_threshold_np.shape) - score_threshold = array_ops.placeholder(score_threshold_np.dtype, - score_threshold_np.shape) - with self.test_scope(): - selected_indices = image_ops.non_max_suppression_padded( - boxes=boxes, - scores=scores, - max_output_size=max_output_size, - iou_threshold=iou_threshold, - score_threshold=score_threshold, - pad_to_max_output_size=True) - inputs_feed = { - boxes: boxes_np, - scores: scores_np, - iou_threshold: iou_threshold_np, - score_threshold: score_threshold_np - } - (indices_tf, num_valid) = sess.run( - selected_indices, feed_dict=inputs_feed) - - self.assertEqual(indices_tf.size, max_output_size) - self.assertEqual(num_valid, 1) - self.assertAllClose(indices_tf[:num_valid], [3]) + boxes_data = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], + [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]] + boxes_np = np.array(boxes_data, dtype=np.float32) + + scores_data = [0.9, 0.75, 0.6, 0.95, 0.5, 0.3] + scores_np = np.array(scores_data, dtype=np.float32) + max_output_size = 1 + iou_threshold_np = np.array(0.5, dtype=np.float32) + score_threshold_np = np.array(0.4, dtype=np.float32) + + with self.cached_session() as sess: + boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) + scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) + iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, + iou_threshold_np.shape) + score_threshold = array_ops.placeholder(score_threshold_np.dtype, + score_threshold_np.shape) + with self.test_scope(): + selected_indices = image_ops.non_max_suppression_padded( + boxes=boxes, + scores=scores, + max_output_size=max_output_size, + iou_threshold=iou_threshold, + score_threshold=score_threshold, + pad_to_max_output_size=True) + inputs_feed = { + boxes: boxes_np, + scores: scores_np, + iou_threshold: iou_threshold_np, + score_threshold: score_threshold_np + } + (indices_tf, num_valid) = sess.run( + selected_indices, feed_dict=inputs_feed) + + self.assertEqual(indices_tf.size, max_output_size) + self.assertEqual(num_valid, 1) + self.assertAllClose(indices_tf[:num_valid], [3]) + + def testSelectFromContinuousOverLap(self): + # Tests that a suppressed box does not itself suppress other boxes. + + boxes_data = [[0, 0, 1, 1], [0, 0.2, 1, 1.2], [0, 0.4, 1, 1.4], + [0, 0.6, 1, 1.6], [0, 0.8, 1, 1.8], [0, 2, 1, 3]] + boxes_np = np.array(boxes_data, dtype=np.float32) + + scores_data = [0.9, 0.75, 0.6, 0.5, 0.4, 0.3] + scores_np = np.array(scores_data, dtype=np.float32) + max_output_size = 3 + iou_threshold_np = np.array(0.5, dtype=np.float32) + score_threshold_np = np.array(0.1, dtype=np.float32) + + with self.cached_session() as sess: + boxes = array_ops.placeholder(boxes_np.dtype, shape=boxes_np.shape) + scores = array_ops.placeholder(scores_np.dtype, shape=scores_np.shape) + iou_threshold = array_ops.placeholder(iou_threshold_np.dtype, + iou_threshold_np.shape) + score_threshold = array_ops.placeholder(score_threshold_np.dtype, + score_threshold_np.shape) + with self.test_scope(): + selected_indices = image_ops.non_max_suppression_padded( + boxes=boxes, + scores=scores, + max_output_size=max_output_size, + iou_threshold=iou_threshold, + score_threshold=score_threshold, + pad_to_max_output_size=True) + inputs_feed = { + boxes: boxes_np, + scores: scores_np, + iou_threshold: iou_threshold_np, + score_threshold: score_threshold_np + } + (indices_tf, num_valid) = sess.run( + selected_indices, feed_dict=inputs_feed) + + self.assertEqual(indices_tf.size, max_output_size) + self.assertEqual(num_valid, 3) + self.assertAllClose(indices_tf[:num_valid], [0, 2, 4]) if __name__ == "__main__": test.main() diff --git a/tensorflow/compiler/tests/jit_test.py b/tensorflow/compiler/tests/jit_test.py index de68ff0e32cd59e65094c0b7319f8ab213eed4db..8778b54dfaf35003c83cf2ab03e9e218c60c98ed 100644 --- a/tensorflow/compiler/tests/jit_test.py +++ b/tensorflow/compiler/tests/jit_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import os import numpy as np +from tensorflow.compiler.tests import test_utils from tensorflow.contrib.compiler import jit from tensorflow.core.protobuf import config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2 @@ -36,8 +37,8 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.platform import test -jit_scope = jit.experimental_jit_scope +jit_scope = jit.experimental_jit_scope # Disable rewrites to make sure we don't end up having to update this test # whenever we implement new ones. @@ -77,11 +78,11 @@ def InLabels(labels, substr): return any([substr in x for x in labels]) -def MetadataHasXlaOp(run_metadata): +def MetadataHasXlaRunOp(run_metadata): """Returns true if there are XlaRun kernels in run_metadata's timeline.""" # TODO(phawkins): find a less hacky way to test whether a kernel ran. - return InLabels(RunMetadataLabels(run_metadata), "XlaRun") + return InLabels(RunMetadataLabels(run_metadata), "_XlaRun") class JitLaunchTest(test.TestCase): @@ -108,15 +109,14 @@ class JitLaunchTest(test.TestCase): direct_op = fn(*placeholders) run_metadata = config_pb2.RunMetadata() - compiled = sess.run(compiled_op, - feeds, - run_metadata=run_metadata, - options=config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.FULL_TRACE)) + compiled = test_utils.RunWithWarmup( + sess, compiled_op, feeds, + config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE), + run_metadata) print("Compiled Result {}".format(compiled)) if require_kernel_launch: - self.assert_(MetadataHasXlaOp(run_metadata)) + self.assert_(MetadataHasXlaRunOp(run_metadata)) direct = sess.run(direct_op, feeds) print("Direct Result {}".format(direct)) @@ -137,7 +137,7 @@ class JitLaunchTest(test.TestCase): a = constant_op.constant(100) # pylint: disable=unused-variable call = KernelWithNoOutputs() # pylint: disable=assignment-from-no-return - sess.run(call, {}) + test_utils.RunWithWarmup(sess, call, {}) def testAliasing(self): """Regression test for compiled functions that return an aliased buffer. @@ -250,17 +250,21 @@ class JitLaunchTest(test.TestCase): dx = np.random.random_sample((batch_size, image_size)).astype(np.float32) with session_lib.Session() as sess: run_metadata = config_pb2.RunMetadata() - output = sess.run(y, {x: dx, - w: dw, - b: db}, - run_metadata=run_metadata, - options=config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.FULL_TRACE)) + output = test_utils.RunWithWarmup( + sess, + y, { + x: dx, + w: dw, + b: db + }, + run_metadata=run_metadata, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) # TODO(phawkins): really we would like to test that there were exactly # two kernel launches. However, we have no reliable way to determine # that. - self.assert_(MetadataHasXlaOp(run_metadata)) + self.assert_(MetadataHasXlaRunOp(run_metadata)) expected = np.square(np.dot(dx, dw) + db) self.assertAllClose(expected, output, rtol=1e-1) @@ -272,7 +276,7 @@ class XlaCompilationTest(test.TestCase): def testReshape(self): """Tests an operator with compile-time constant and non-constant inputs.""" - with self.test_session(config=NoRewriteSessionConfig()) as sess: + with self.session(config=NoRewriteSessionConfig()) as sess: x = array_ops.placeholder(dtypes.float32) y = array_ops.placeholder(dtypes.int32) with jit_scope(): @@ -284,19 +288,22 @@ class XlaCompilationTest(test.TestCase): # statically known as part of the JIT compilation's input graph. z = array_ops.reshape(x, y) run_metadata = config_pb2.RunMetadata() - out = sess.run(z, - {x: np.array([1, 2, 3, 4, 5, 6], np.float32), - y: [-1, 3]}, - run_metadata=run_metadata, - options=config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.FULL_TRACE)) - self.assert_(MetadataHasXlaOp(run_metadata)) + out = test_utils.RunWithWarmup( + sess, + z, { + x: np.array([1, 2, 3, 4, 5, 6], np.float32), + y: [-1, 3] + }, + run_metadata=run_metadata, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) + self.assert_(MetadataHasXlaRunOp(run_metadata)) self.assertAllClose(np.array([[1, 2, 3], [4, 5, 6]], np.float32), out) def testIgnoredArguments(self): """Tests that JIT computations can ignore formal parameters.""" - with self.test_session(config=NoRewriteSessionConfig()) as sess: + with self.session(config=NoRewriteSessionConfig()) as sess: x = array_ops.placeholder(dtypes.int32) y = array_ops.placeholder(dtypes.int32) with jit_scope(): @@ -309,18 +316,22 @@ class XlaCompilationTest(test.TestCase): t = math_ops.add(z, z) run_metadata = config_pb2.RunMetadata() - out = sess.run(t, {x: np.int32(7), - y: np.int32(404)}, - run_metadata=run_metadata, - options=config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.FULL_TRACE)) - self.assert_(MetadataHasXlaOp(run_metadata)) + out = test_utils.RunWithWarmup( + sess, + t, { + x: np.int32(7), + y: np.int32(404) + }, + run_metadata=run_metadata, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) + self.assert_(MetadataHasXlaRunOp(run_metadata)) self.assertAllClose(28, out) def testLoops(self): """Tests that compilation accepts computations containing loops.""" - with self.test_session(config=NoRewriteSessionConfig()) as session: + with self.session(config=NoRewriteSessionConfig()) as session: x = array_ops.placeholder(dtypes.float32) with jit_scope(): c = lambda i, _: math_ops.less(i, 5) @@ -332,13 +343,13 @@ class XlaCompilationTest(test.TestCase): run_metadata=run_metadata, options=config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE)) - self.assert_(MetadataHasXlaOp(run_metadata)) + self.assert_(MetadataHasXlaRunOp(run_metadata)) self.assertAllClose(result, np.float32(95), rtol=1e-1) def testCond(self): """Tests that compilation handles switch operators.""" - with self.test_session(config=NoRewriteSessionConfig()) as session: + with self.session(config=NoRewriteSessionConfig()) as session: x = array_ops.placeholder(dtypes.float32) y = array_ops.placeholder(dtypes.float32) c = array_ops.placeholder(dtypes.bool) @@ -351,13 +362,17 @@ class XlaCompilationTest(test.TestCase): # deadlock. run_metadata = config_pb2.RunMetadata() - result = session.run(t, {x: np.float32(2), - y: np.float32(4), - c: True}, - run_metadata=run_metadata, - options=config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.FULL_TRACE)) - self.assert_(MetadataHasXlaOp(run_metadata)) + result = test_utils.RunWithWarmup( + session, + t, { + x: np.float32(2), + y: np.float32(4), + c: True + }, + run_metadata=run_metadata, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) + self.assert_(MetadataHasXlaRunOp(run_metadata)) self.assertAllClose(result, np.float32(6), rtol=1e-1) def testNestedFunction(self): @@ -379,7 +394,7 @@ class XlaCompilationTest(test.TestCase): inp = array_ops.placeholder(dtypes.float32) out = Entry(inp) - with self.test_session( + with self.session( config=NoRewriteSessionConfig(), graph=g, use_gpu=True) as sess: run_metadata = config_pb2.RunMetadata() val = sess.run(out, @@ -392,7 +407,7 @@ class XlaCompilationTest(test.TestCase): def testLoopDeadlock(self): """Regression test for bug that caused deadlocks in graphs with loops.""" - with self.test_session(config=NoRewriteSessionConfig()) as session: + with self.session(config=NoRewriteSessionConfig()) as session: x = array_ops.placeholder(dtypes.float32) with jit_scope(): y = x + 1.0 @@ -425,11 +440,13 @@ class XlaCompilationTest(test.TestCase): cfg.graph_options.optimizer_options.do_function_inlining = True with session_lib.Session(graph=g, config=cfg) as sess: run_metadata = config_pb2.RunMetadata() - dx_val = sess.run(dx, - feed_dict={x: 100.}, - run_metadata=run_metadata, - options=config_pb2.RunOptions( - trace_level=config_pb2.RunOptions.FULL_TRACE)) + dx_val = test_utils.RunWithWarmup( + sess, + dx, + feed_dict={x: 100.}, + run_metadata=run_metadata, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) self.assertAllClose(dx_val, 0.01) return RunMetadataLabels(run_metadata) @@ -475,7 +492,8 @@ class ElementWiseFusionTest(test.TestCase): a7 = a6 + a2 run_metadata = config_pb2.RunMetadata() - output = sess.run( + output = test_utils.RunWithWarmup( + sess, a7, { a1: arg0, a2: arg1 @@ -509,5 +527,60 @@ class ElementWiseFusionTest(test.TestCase): self.assertAllClose(tf_op, tfef_op, rtol=1e-1) +class LazyCompilationTest(test.TestCase): + + def testLazyCompilation(self): + + @function.Defun(compiled=True) + def CompiledFunction(x): + return math_ops.log(x) + + with session_lib.Session(config=NoRewriteSessionConfig()) as sess: + x = array_ops.placeholder(dtypes.float32) + y = CompiledFunction(x) + + run_metadata_before_warmup = config_pb2.RunMetadata() + sess.run( + y, + feed_dict={x: [2., 10.]}, + run_metadata=run_metadata_before_warmup, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) + self.assertTrue( + InLabels( + RunMetadataLabels(run_metadata_before_warmup), "_XlaCompile")) + self.assertFalse( + InLabels(RunMetadataLabels(run_metadata_before_warmup), "_XlaRun")) + + # We compile when we see the same shape a second time. + + run_metadata_after_warmup = config_pb2.RunMetadata() + sess.run( + y, + feed_dict={x: [2., 10.]}, + run_metadata=run_metadata_after_warmup, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) + self.assertTrue( + InLabels(RunMetadataLabels(run_metadata_after_warmup), "_XlaCompile")) + self.assertTrue( + InLabels(RunMetadataLabels(run_metadata_after_warmup), "_XlaRun")) + + run_metadata_for_new_shape = config_pb2.RunMetadata() + sess.run( + y, + feed_dict={x: [2., 10., 12.]}, + run_metadata=run_metadata_for_new_shape, + options=config_pb2.RunOptions( + trace_level=config_pb2.RunOptions.FULL_TRACE)) + self.assertTrue( + InLabels( + RunMetadataLabels(run_metadata_for_new_shape), "_XlaCompile")) + self.assertFalse( + InLabels(RunMetadataLabels(run_metadata_for_new_shape), "_XlaRun")) + + if __name__ == "__main__": + os.environ["TF_XLA_FLAGS"] = ("--tf_xla_enable_lazy_compilation=true " + + os.environ.get("TF_XLA_FLAGS", "")) test.main() diff --git a/tensorflow/compiler/tests/sort_ops_test.py b/tensorflow/compiler/tests/sort_ops_test.py index 57f0ab7a9eae16ab3de61af9760dfba1ab355b46..3e499c2fb176a6d63fe3590e18a4a90e461e096a 100644 --- a/tensorflow/compiler/tests/sort_ops_test.py +++ b/tensorflow/compiler/tests/sort_ops_test.py @@ -57,10 +57,13 @@ class XlaSortOpTest(xla_test.XLATestCase): xla.sort, [x], expected=[np.arange(101, dtype=dtype)]) def testKeyValueSort(self): - supported_types = set( + supported_key_types = set( [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32]) - for key_type in supported_types.intersection(self.numeric_types): - for value_type in supported_types.intersection(self.numeric_types): + supported_value_types = set( + [dtypes.bfloat16.as_numpy_dtype, np.float32, np.int32, np.uint32, + dtypes.int64.as_numpy_dtype, dtypes.uint64.as_numpy_dtype]) + for key_type in supported_key_types.intersection(self.numeric_types): + for value_type in supported_value_types.intersection(self.numeric_types): x = np.arange(101, dtype=key_type) np.random.shuffle(x) y = (-x).astype(value_type) diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py index 78244d0b366d9128a4c59f786e4c5ac12e743b75..46ca371c8abf1cb4710717a183ee12820c4c4ca0 100644 --- a/tensorflow/compiler/tests/tensor_array_ops_test.py +++ b/tensorflow/compiler/tests/tensor_array_ops_test.py @@ -920,6 +920,34 @@ class TensorArrayTest(xla_test.XLATestCase): def testTensorArrayEvalEmptyWithDefault(self): self._testTensorArrayEvalEmptyWithDefault() + def _testTensorArrayScatterRead(self, tf_dtype): + with self.cached_session() as session, self.test_scope(): + convert = _make_converter(tf_dtype) + + ta = tensor_array_ops.TensorArray( + dtype=tf_dtype, + tensor_array_name="foo", + size=10) + + indices = constant_op.constant([1, 8]) + value = constant_op.constant(convert([[1.0, -1.0], [10.0, -10.0]])) + id0 = array_ops.placeholder(dtypes.int32) + id1 = array_ops.placeholder(dtypes.int32) + + w = ta.scatter(indices, value) + r0 = w.read(id0) + r1 = w.read(id1) + + # Test aggregation of read + read_vals = session.run([r0, r1], feed_dict={id0: 1, id1: 8}) + self.assertAllEqual(convert([1.0, -1.0]), read_vals[0]) + self.assertAllEqual(convert([10.0, -10.0]), read_vals[1]) + + def testTensorArrayScatterRead(self): + for dtype in self.numeric_tf_types: + self._testTensorArrayScatterRead(dtype) + self._testTensorArrayScatterRead(dtypes.bool) + def testTensorArrayScatterReadAndGradients(self): with self.cached_session() as session, self.test_scope(): ta = tensor_array_ops.TensorArray( @@ -929,15 +957,18 @@ class TensorArrayTest(xla_test.XLATestCase): indices = constant_op.constant([1, 8]) value = constant_op.constant([[1.0, -1.0], [10.0, -10.0]]) + id0 = array_ops.placeholder(dtypes.int32) + id1 = array_ops.placeholder(dtypes.int32) w = ta.scatter(indices, value) - r0 = w.read(1) - r1 = w.read(8) + r0 = w.read(id0) + r1 = w.read(id1) # Test combined gradients + aggregation of read(0). grad = gradients_impl.gradients( ys=[r0, r1], xs=[value], grad_ys=[[2.0, 3.0], [4.0, 5.0]]) - read_vals, grad_vals = session.run([[r0, r1], grad]) + read_vals, grad_vals = session.run([[r0, r1], grad], + feed_dict={id0: 1, id1: 8}) self.assertEqual(len(read_vals), 2) self.assertEqual(len(grad_vals), 1) diff --git a/tensorflow/compiler/tests/test_utils.py b/tensorflow/compiler/tests/test_utils.py index 6abde18ea91f16d153a154b94effab037a911c6c..0e77dbf1a79d3dbacb77bab8b8e3df9bcc6287e1 100644 --- a/tensorflow/compiler/tests/test_utils.py +++ b/tensorflow/compiler/tests/test_utils.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin def ConvertBetweenDataFormats(x, data_format_src, data_format_dst): @@ -61,3 +62,14 @@ def PermuteDimsBetweenDataFormats(dims, data_format_src, data_format_dst): dim_map = {d: i for i, d in enumerate(data_format_src)} permuted_dims = [dims[dim_map[d]] for d in data_format_dst] return permuted_dims + + +_JIT_WARMUP_ITERATIONS = 10 + + +def RunWithWarmup(sess, op_to_run, feed_dict, options=None, run_metadata=None): + """Runs a graph a few times to ensure that its clusters are compiled.""" + for _ in xrange(0, _JIT_WARMUP_ITERATIONS): + sess.run(op_to_run, feed_dict, options=options) + return sess.run( + op_to_run, feed_dict, options=options, run_metadata=run_metadata) diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index 3f631f91ec442c149b3ea4df3826d98b0419a76f..f0e7791e9811533502fae0d4dea5a2e1ca2cf33c 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -308,6 +308,7 @@ tf_cc_test( "//tensorflow/cc:function_ops", "//tensorflow/cc:ops", "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:math_ops_op_lib", "//tensorflow/core:protos_all_cc", @@ -661,5 +662,6 @@ cc_library( hdrs = ["side_effect_util.h"], deps = [ "//tensorflow/core:core_cpu", + "@com_google_absl//absl/strings", ], ) diff --git a/tensorflow/compiler/tf2xla/functionalize_cond.cc b/tensorflow/compiler/tf2xla/functionalize_cond.cc index db256e577a1f3dd38e04d102f60182023b9d43b2..46649b8cc43016d4a62f49e20256c77ca8accc79 100644 --- a/tensorflow/compiler/tf2xla/functionalize_cond.cc +++ b/tensorflow/compiler/tf2xla/functionalize_cond.cc @@ -695,6 +695,12 @@ Status Conditional::BuildIfNode(Graph* graph, VLOG(3) << "Build output type: " << DataTypeVectorString(out_type); builder.Attr("Tcond", DT_BOOL); + string outside_compilation; + if (GetNodeAttr(predicate_.node->def(), kXlaOutsideCompilationAttrName, + &outside_compilation) + .ok()) { + builder.Attr(kXlaOutsideCompilationAttrName, outside_compilation); + } builder.Device(predicate_.node->assigned_device_name()); // Conditional should be the first input ... builder.Input(NodeDefBuilder::NodeOut(predicate_.node->name(), diff --git a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc index 0362682bd6a8d0977bb09854ef448075fba99273..f818d80022da0bad851c896f2714c15b20b22195 100644 --- a/tensorflow/compiler/tf2xla/functionalize_control_flow.cc +++ b/tensorflow/compiler/tf2xla/functionalize_control_flow.cc @@ -116,7 +116,7 @@ Status FunctionalizeControlFlowForFunction( std::vector>> nodes_to_associated_functions; for (auto* n : g->nodes()) { - auto associated_functions = GetAssociatedFunctions(*n, flr); + auto associated_functions = GetAssociatedFunctions(*n, fld); if (!associated_functions.empty()) { nodes_to_associated_functions.push_back({n, associated_functions}); } @@ -226,9 +226,13 @@ Status FunctionalizeControlFlowPass::Run( pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); // Find XLA compile ops and its corresponding FunctionDef. + // TPUCompile op is not in the map because graph rewriting might happen + // multiple times, and we want to avoid functionalize it again. static std::map* kNodeTypeToFunctionAttrMapping = new std::map{ - {"TPUCompile", "function"}, + // TPUReplicate ops are generated by EncapsulateTPUComputationsPass. + {"TPUReplicate", "computation"}, + // XlaLaunch ops are generated by EncapsulateXlaComputationsPass. {"XlaLaunch", "function"}, }; std::map> canonicalized_name_to_new_name; diff --git a/tensorflow/compiler/tf2xla/functionalize_while.cc b/tensorflow/compiler/tf2xla/functionalize_while.cc index 7c3ad448ef546dd1ab2640a57d7d1d73ca3768ad..d87436a7b4ac37c74d0f0df921779c8716290013 100644 --- a/tensorflow/compiler/tf2xla/functionalize_while.cc +++ b/tensorflow/compiler/tf2xla/functionalize_while.cc @@ -523,6 +523,12 @@ Status FunctionalizeLoop(const FunctionLibraryDefinition* lookup_library, builder.Attr("T", arg_types); builder.Attr("cond", cond_name); builder.Attr("body", body_name); + string outside_compilation; + if (GetNodeAttr(frame->loop_cond->def(), kXlaOutsideCompilationAttrName, + &outside_compilation) + .ok()) { + builder.Attr(kXlaOutsideCompilationAttrName, outside_compilation); + } std::vector inputs; for (int i = 0; i < frame->args.size(); ++i) { const Arg& arg = frame->args[i]; diff --git a/tensorflow/compiler/tf2xla/kernels/image_ops.cc b/tensorflow/compiler/tf2xla/kernels/image_ops.cc index 921b4340c0ac674a5ad7d17aaf54f1cf36975151..6713d6bc921b24b25baddfb3fd7296fffcc3d6ea 100644 --- a/tensorflow/compiler/tf2xla/kernels/image_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/image_ops.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/compiler/tf2xla/kernels/gather_op_helpers.h" #include "tensorflow/compiler/tf2xla/lib/util.h" +#include "tensorflow/compiler/tf2xla/lib/while_loop.h" #include "tensorflow/compiler/tf2xla/xla_helpers.h" #include "tensorflow/compiler/tf2xla/xla_op_kernel.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" @@ -316,6 +318,70 @@ class AdjustHueOp : public XlaOpKernel { }; REGISTER_XLA_OP(Name("AdjustHue"), AdjustHueOp); +struct WhileCondFn { + const int64 num_boxes; + const int64 output_size; + + explicit WhileCondFn(int64 num_boxes, int64 output_size) + : num_boxes(num_boxes), output_size(output_size) {} + + xla::StatusOr operator()(absl::Span values, + xla::XlaBuilder* cond_builder) const { + xla::XlaOp row_idx = values[0]; + xla::XlaOp row_in_bounds = + xla::Lt(row_idx, xla::ConstantR0(cond_builder, num_boxes)); + xla::XlaOp num_outputs_so_far = values[1]; + xla::XlaOp results_not_full = xla::Lt( + num_outputs_so_far, xla::ConstantR0(cond_builder, output_size)); + return xla::And(row_in_bounds, results_not_full); + } +}; + +// Process the boxes one-by-one using the iou matrix mask. +// This implementation uses a correct, but greedy, sequential algorithm +// to ensure that suppressed boxes cannot themselves suppress other +// boxes. +struct SuppressBodyFn { + const int64 num_boxes; + + explicit SuppressBodyFn(int64 num_boxes) : num_boxes(num_boxes) {} + + xla::StatusOr> operator()( + absl::Span values, xla::XlaBuilder* builder) const { + auto row_idx = values[0]; + auto num_outputs_so_far = values[1]; + auto iou_mask = values[2]; + auto included_iou = values[3]; + auto zero_r1 = xla::ConstantR1(builder, {0}); + // Determine if current elem is active using a slice. + auto row_idx_r1 = xla::Reshape(row_idx, {1}); + auto active_elem = xla::DynamicSlice(included_iou, row_idx_r1, {1}); + active_elem = xla::Reshape(active_elem, {}); + // Increment output count iff current elem is not suppressed. + num_outputs_so_far = xla::Select( + active_elem, num_outputs_so_far + xla::ConstantR0(builder, 1), + num_outputs_so_far); + // Slice out the row_idx. + auto starts = xla::ConcatInDim(builder, {row_idx_r1, zero_r1}, 0); + auto row_iou = xla::DynamicSlice(iou_mask, starts, {1, num_boxes}); + // Remove the diagonal from consideration. An elem cannot suppress + // itself. + auto update_starts = xla::ConcatInDim(builder, {zero_r1, row_idx_r1}, 0); + row_iou = xla::DynamicUpdateSlice( + row_iou, xla::ConstantR2FromArray2D(builder, {{false}}), + update_starts); + // Create a suppression by inverting polarity. + row_iou = xla::Reshape(row_iou, {num_boxes}); + auto supp_mask = xla::Not(row_iou); + // Update mask iff current elem is not suppressed. + included_iou = xla::Select(xla::Broadcast(active_elem, {num_boxes}), + xla::And(included_iou, supp_mask), included_iou); + row_idx = row_idx + xla::ConstantR0(builder, 1); + return std::vector{row_idx, num_outputs_so_far, iou_mask, + included_iou}; + } +}; + class NonMaxSuppressionOp : public XlaOpKernel { public: explicit NonMaxSuppressionOp(OpKernelConstruction* context) @@ -326,14 +392,12 @@ class NonMaxSuppressionOp : public XlaOpKernel { void Compile(XlaOpKernelContext* context) override { // TODO(b/111646731): Improve scalability of this op, using blocking. - int num_boxes_dim = 0; - int coords_dim = 1; const TensorShape& boxes_shape = context->InputShape("boxes"); OP_REQUIRES(context, TensorShapeUtils::IsMatrix(boxes_shape), errors::InvalidArgument("boxes must be 2-D, currently: ", boxes_shape.DebugString())); - const int64 num_boxes = boxes_shape.dim_size(num_boxes_dim); - OP_REQUIRES(context, boxes_shape.dim_size(coords_dim) == 4, + const int64 num_boxes = boxes_shape.dim_size(0); + OP_REQUIRES(context, boxes_shape.dim_size(1) == 4, errors::InvalidArgument("boxes must have 4 columns", boxes_shape.DebugString())); const TensorShape& scores_shape = context->InputShape("scores"); @@ -347,9 +411,13 @@ class NonMaxSuppressionOp : public XlaOpKernel { OP_REQUIRES(context, pad_to_max_output_size_, errors::InvalidArgument( "XLA compilation requires pad_to_max_output_size == True")); + OP_REQUIRES(context, num_boxes <= kint32max, + errors::InvalidArgument("XLA compilation requires number of " + "boxes to be <= kint32max, got ", + num_boxes)); - xla::XlaOp boxes = context->Input("boxes"); - xla::XlaOp scores = context->Input("scores"); + const xla::XlaOp boxes_input = context->Input("boxes"); + const xla::XlaOp scores_input = context->Input("scores"); int64 output_size; OP_REQUIRES_OK(context, context->ConstantInputAsIntScalar(2, &output_size)); OP_REQUIRES( @@ -358,90 +426,113 @@ class NonMaxSuppressionOp : public XlaOpKernel { OP_REQUIRES(context, output_size <= kint32max, errors::InvalidArgument("Need output_size <= kint32Max, got ", output_size)); - xla::XlaOp score_thresh = context->Input("score_threshold"); - xla::XlaOp iou_thresh = context->Input("iou_threshold"); - + const xla::XlaOp score_thresh = context->Input("score_threshold"); + const xla::XlaOp iou_thresh = context->Input("iou_threshold"); xla::XlaBuilder* const builder = context->builder(); // Choose a more convenient layout. - xla::XlaOp boxes_t = xla::Transpose(boxes, {1, 0}); - coords_dim = 0; - num_boxes_dim = 1; - - // Shapes are henceforth [1, num_boxes]. - xla::XlaOp coord_y0 = xla::SliceInDim(boxes_t, - /*start_index=*/0, - /*limit_index=*/1, - /*stride=*/1, - /*dimno=*/coords_dim); - xla::XlaOp coord_x0 = xla::SliceInDim(boxes_t, - /*start_index=*/1, - /*limit_index=*/2, - /*stride=*/1, - /*dimno=*/coords_dim); - xla::XlaOp coord_y1 = xla::SliceInDim(boxes_t, - /*start_index=*/2, - /*limit_index=*/3, - /*stride=*/1, - /*dimno=*/coords_dim); - xla::XlaOp coord_x1 = xla::SliceInDim(boxes_t, - /*start_index=*/3, - /*limit_index=*/4, - /*stride=*/1, - /*dimno=*/coords_dim); - xla::XlaOp y1 = - xla::Select(xla::Le(coord_y0, coord_y1), coord_y0, coord_y1); - xla::XlaOp y2 = - xla::Select(xla::Le(coord_y0, coord_y1), coord_y1, coord_y0); - xla::XlaOp x1 = - xla::Select(xla::Le(coord_x0, coord_x1), coord_x0, coord_x1); - xla::XlaOp x2 = - xla::Select(xla::Le(coord_x0, coord_x1), coord_x1, coord_x0); + const xla::XlaOp boxes = xla::Transpose(boxes_input, {1, 0}); + const xla::XlaOp boxes_sorted = xla::GetTupleElement( + xla::Sort(/*keys=*/-xla::Broadcast(scores_input, {4}), + /*values=*/{boxes}, + /*dimension=*/1), + 1); + // Track the mapping of indices into sorted domain. + const xla::XlaOp iota_indices = xla::Iota(builder, xla::S32, num_boxes); + const xla::XlaOp indices_sort = xla::Sort(-scores_input, {iota_indices}); + const xla::XlaOp indices_sorted = xla::GetTupleElement(indices_sort, 1); + const xla::XlaOp scores = xla::Neg(xla::GetTupleElement(indices_sort, 0)); + + // Shapes are henceforth [1, num_boxes]. 'c_y0' denotes 'coordinate' y0. + const xla::XlaOp c_y0 = xla::Reshape(xla::SliceInDim(boxes_sorted, + /*start_index=*/0, + /*limit_index=*/1, + /*stride=*/1, + /*dimno=*/0), + {num_boxes}); + const xla::XlaOp c_x0 = xla::Reshape(xla::SliceInDim(boxes_sorted, + /*start_index=*/1, + /*limit_index=*/2, + /*stride=*/1, + /*dimno=*/0), + {num_boxes}); + const xla::XlaOp c_y1 = xla::Reshape(xla::SliceInDim(boxes_sorted, + /*start_index=*/2, + /*limit_index=*/3, + /*stride=*/1, + /*dimno=*/0), + {num_boxes}); + const xla::XlaOp c_x1 = xla::Reshape(xla::SliceInDim(boxes_sorted, + /*start_index=*/3, + /*limit_index=*/4, + /*stride=*/1, + /*dimno=*/0), + {num_boxes}); + + xla::XlaOp y1 = xla::Select(xla::Le(c_y0, c_y1), c_y0, c_y1); + xla::XlaOp y2 = xla::Select(xla::Le(c_y0, c_y1), c_y1, c_y0); + xla::XlaOp x1 = xla::Select(xla::Le(c_x0, c_x1), c_x0, c_x1); + xla::XlaOp x2 = xla::Select(xla::Le(c_x0, c_x1), c_x1, c_x0); xla::XlaOp area = (y2 - y1) * (x2 - x1); - // Transpose the 1xN tensors, instead of the NxN tensors. - xla::XlaOp y1_t = xla::Transpose(y1, {1, 0}); - xla::XlaOp y2_t = xla::Transpose(y2, {1, 0}); - xla::XlaOp x1_t = xla::Transpose(x1, {1, 0}); - xla::XlaOp x2_t = xla::Transpose(x2, {1, 0}); - xla::XlaOp area_t = xla::Transpose(area, {1, 0}); + // Shapes are henceforth [1, num_boxes]. + y1 = xla::Broadcast(y1, {1}); + y2 = xla::Broadcast(y2, {1}); + x1 = xla::Broadcast(x1, {1}); + x2 = xla::Broadcast(x2, {1}); + area = xla::Broadcast(area, {1}); // Shapes are henceforth [num_boxes, num_boxes]. - xla::XlaOp i_xmin = xla::Max(x1, x1_t); - xla::XlaOp i_ymin = xla::Max(y1, y1_t); - xla::XlaOp i_xmax = xla::Min(x2, x2_t); - xla::XlaOp i_ymax = xla::Min(y2, y2_t); + xla::XlaOp i_xmin = xla::Max(x1, xla::Transpose(x1, {1, 0})); + xla::XlaOp i_ymin = xla::Max(y1, xla::Transpose(y1, {1, 0})); + xla::XlaOp i_xmax = xla::Min(x2, xla::Transpose(x2, {1, 0})); + xla::XlaOp i_ymax = xla::Min(y2, xla::Transpose(y2, {1, 0})); auto square_zero = xla::ZerosLike(i_xmin); xla::XlaOp i_area = xla::Max(i_xmax - i_xmin, square_zero) * xla::Max(i_ymax - i_ymin, square_zero); - xla::XlaOp u_area = area + area_t - i_area; + xla::XlaOp u_area = area + xla::Transpose(area, {1, 0}) - i_area; xla::XlaOp iou = i_area / u_area; xla::XlaOp iou_thresh_mask = xla::Gt(iou, iou_thresh + square_zero); - xla::XlaOp scores_2d = xla::Reshape(scores, {num_boxes, 1}); - xla::XlaOp score_cmp_mask = - xla::Gt(scores_2d, xla::Transpose(scores_2d, {1, 0})); - xla::XlaOp suppress = xla::And(iou_thresh_mask, score_cmp_mask); - - // Shapes are [num_boxes] after the reduce. - xla::XlaOp included_iou = xla::Not(xla::Reduce( - suppress, - /*init_value=*/xla::ConstantR0(builder, false), - /*computation=*/CreateScalarOrComputation(xla::PRED, builder), - /*dimensions_to_reduce=*/{0})); + xla::XlaOp included_iou = + xla::Broadcast(xla::ConstantR0(builder, true), {num_boxes}); + + std::vector init_values; + init_values.reserve(4); + init_values.push_back(xla::ConstantR0(builder, 0)); // col_idx + init_values.push_back(xla::ConstantR0(builder, 0)); // num_outputs + init_values.push_back(iou_thresh_mask); + init_values.push_back(included_iou); + + auto suppress_loop_result = + XlaWhileLoop(WhileCondFn(num_boxes, output_size), + SuppressBodyFn(num_boxes), init_values, "suppress_loop", + builder) + .ValueOrDie(); + xla::XlaOp included_score = xla::Gt(scores, xla::Broadcast(score_thresh, {num_boxes})); - xla::XlaOp included = xla::And(included_iou, included_score); + xla::XlaOp included = xla::And(included_score, suppress_loop_result[3]); + + // Only consider boxes over which we have iterated. This allows for accurate + // counting. DynamicSlice would require knowledge of the size of the output. + auto valid_elem = xla::Lt( + iota_indices, xla::Broadcast(suppress_loop_result[0], {num_boxes})); + included = xla::And(included, valid_elem); + xla::XlaOp neg_inf = xla::Broadcast(xla::MinValue(builder, xla::F32), {num_boxes}); xla::XlaOp scores_included = xla::Select(included, scores, neg_inf); - + xla::XlaOp output_tuple = TopK(scores_included, output_size); + xla::XlaOp selected_indices_sorted = xla::GetTupleElement(output_tuple, 1); + // Calculate num_valid. + // Note: num_valid cannot be taken from the loop outputs, because outputs + // can be suppressed by score threshold. xla::XlaOp ones_included = xla::Select( included, xla::Broadcast(xla::ConstantR0(builder, 1), {num_boxes}), xla::Broadcast(xla::ConstantR0(builder, 0), {num_boxes})); - // num_valid is scalar. Value should be bound by output_size. xla::XlaOp num_valid_total = xla::Reduce( ones_included, @@ -451,8 +542,17 @@ class NonMaxSuppressionOp : public XlaOpKernel { xla::XlaOp num_valid = xla::Min(num_valid_total, xla::ConstantR0(builder, output_size)); - xla::XlaOp output_tuple = TopK(scores_included, output_size); - xla::XlaOp selected_indices = xla::GetTupleElement(output_tuple, 1); + // Re-index into the original scores input tensor, using a Gather. + // Boxes were suppressed in the sorted domain. + xla::XlaOp selected_indices; + DataType gather_type = context->expected_output_dtype(0); + OP_REQUIRES_OK( + context, + XlaGather(indices_sorted, scores_shape, selected_indices_sorted, + TensorShape({output_size}), + /*axis=*/0, + /*indices_are_nd=*/false, + /*dtype=*/gather_type, DT_INT32, builder, &selected_indices)); context->SetOutput(0, selected_indices); context->SetOutput(1, num_valid); diff --git a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc index 3d81ae9eb89a80e5b89b180ad77521c5ed15e79d..f210bfbd886e48b8d7972393ed1899491486646c 100644 --- a/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc +++ b/tensorflow/compiler/tf2xla/kernels/index_ops_cpu.cc @@ -88,20 +88,30 @@ class ArgMaxCustomCallOp : public XlaOpKernel { xla::ConstantLiteral(&b, xla::LiteralUtil::CreateR0(dim))); } - xla::Shape xla_shape = - xla::ShapeUtil::MakeShape(xla::S64, output_shape.dim_sizes()); + // The argmax function expects row-major layout. + xla::Shape xla_shape = xla::ShapeUtil::MakeShapeWithDescendingLayout( + xla::S64, output_shape.dim_sizes()); + std::vector arg_shapes; + for (const xla::XlaOp& arg : args) { + auto shape_status = b.GetShape(arg); + OP_REQUIRES_OK(ctx, shape_status.status()); + xla::Shape arg_shape = shape_status.ConsumeValueOrDie(); + *arg_shape.mutable_layout() = xla::LayoutUtil::MakeDescendingLayout( + xla::ShapeUtil::Rank(arg_shape)); + arg_shapes.push_back(std::move(arg_shape)); + } // Tell XLA to call the custom code, defined in // index_ops_kernel_argmax_float_1d.cc. xla::XlaOp output; switch (input_shape.dims()) { case 1: - output = - xla::CustomCall(&b, "argmax_float_1d_xla_impl", args, xla_shape); + output = xla::CustomCallWithLayout(&b, "argmax_float_1d_xla_impl", args, + xla_shape, arg_shapes); break; case 2: - output = - xla::CustomCall(&b, "argmax_float_2d_xla_impl", args, xla_shape); + output = xla::CustomCallWithLayout(&b, "argmax_float_2d_xla_impl", args, + xla_shape, arg_shapes); break; default: OP_REQUIRES(ctx, false, diff --git a/tensorflow/compiler/tf2xla/kernels/permute_op.cc b/tensorflow/compiler/tf2xla/kernels/permute_op.cc index 0764e5503db583351e92a144b2c361e8875161d3..3ca5eecf1a811aca9ad9201ba285d2112db7533e 100644 --- a/tensorflow/compiler/tf2xla/kernels/permute_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/permute_op.cc @@ -78,7 +78,7 @@ class DataFormatVecPermuteOp : public XlaOpKernel { keys = xla::BroadcastInDim( keys, xla::ShapeUtil::MakeShape(xla::S32, {4, 2}), {0}); } - auto sorted = xla::Sort(keys, ctx->Input(0), 0); + auto sorted = xla::Sort(keys, {ctx->Input(0)}, 0); auto output = xla::GetTupleElement(sorted, 1); ctx->SetOutput(0, output); } diff --git a/tensorflow/compiler/tf2xla/kernels/random_ops.cc b/tensorflow/compiler/tf2xla/kernels/random_ops.cc index afd5986846705f66eb4c7ced9dbe2f4757f5af7f..7ef6fa305b7f5b5aae187808f856a9273f101e14 100644 --- a/tensorflow/compiler/tf2xla/kernels/random_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/random_ops.cc @@ -135,7 +135,7 @@ class RandomShuffleOp : public XlaOpKernel { xla::XlaOp curr = input; for (int i = 0; i < rounds; ++i) { xla::XlaOp keys = xla::RngUniform(zero, max_value, key_shape); - xla::XlaOp sorted = xla::Sort(keys, curr); + xla::XlaOp sorted = xla::Sort(keys, {curr}); curr = xla::GetTupleElement(sorted, 1); } diff --git a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc index 8102faad28db71075fb8da269c55edbdb667193e..8eee5b12991fb377203d780cecd8916952bd699a 100644 --- a/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/reduce_window_op.cc @@ -40,10 +40,16 @@ class ReduceWindowOp : public XlaOpKernel { std::vector window_dimensions; std::vector window_strides; + std::vector base_dilations; + std::vector window_dilations; OP_REQUIRES_OK(context, context->ConstantInputAsIntVector( "window_dimensions", &window_dimensions)); OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("window_strides", &window_strides)); + OP_REQUIRES_OK(context, context->ConstantInputAsIntVector("base_dilations", + &base_dilations)); + OP_REQUIRES_OK(context, context->ConstantInputAsIntVector( + "window_dilations", &window_dilations)); const int rank = input_shape.dims(); OP_REQUIRES(context, rank == window_dimensions.size(), @@ -56,6 +62,16 @@ class ReduceWindowOp : public XlaOpKernel { "The size of window_strides must be equal to the input " "rank (", window_strides.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == base_dilations.size(), + errors::InvalidArgument( + "The size of base_dilations must be equal to the input " + "rank (", + base_dilations.size(), " vs. ", rank, ")")); + OP_REQUIRES(context, rank == window_dilations.size(), + errors::InvalidArgument( + "The size of window_dilations must be equal to the input " + "rank (", + window_dilations.size(), " vs. ", rank, ")")); // Build the reducer function. XlaCompiler::Argument reducer_arg; @@ -102,7 +118,8 @@ class ReduceWindowOp : public XlaOpKernel { xla::XlaOp output = xla::ReduceWindowWithGeneralPadding( context->Input(0), context->Input(1), *reducer.computation, - window_dimensions, window_strides, padding); + window_dimensions, window_strides, base_dilations, window_dilations, + padding); context->SetOutput(0, output); } @@ -115,6 +132,8 @@ class ReduceWindowOp : public XlaOpKernel { REGISTER_XLA_OP(Name("XlaReduceWindow") .CompileTimeConstInput("window_dimensions") .CompileTimeConstInput("window_strides") + .CompileTimeConstInput("base_dilations") + .CompileTimeConstInput("window_dilations") .CompileTimeConstInput("padding"), ReduceWindowOp); diff --git a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc index ab094d7dd1ce9856a3c2854fd2776827d6c4b76f..57afd608de820573821d605cadcc8779474b5fd6 100644 --- a/tensorflow/compiler/tf2xla/kernels/scan_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/scan_ops.cc @@ -104,7 +104,8 @@ class ScanOp : public XlaOpKernel { } auto output = xla::ReduceWindowWithGeneralPadding( XlaHelpers::ConvertElementType(builder, ctx->Input(0), dtype), init, - *reducer, window_dims, window_strides, padding); + *reducer, window_dims, window_strides, + /*base_dilations=*/{}, /*window_dilations=*/{}, padding); output = XlaHelpers::ConvertElementType(builder, output, ctx->input_type(0)); diff --git a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc index 45f03d8c2175fc8b425b329b90893bb54d7f1d87..6cfdf4a5ae479e9851454df97160754f122bc6ff 100644 --- a/tensorflow/compiler/tf2xla/kernels/sort_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/sort_ops.cc @@ -38,7 +38,7 @@ class XlaKeyValueSortOp : public XlaOpKernel { void Compile(XlaOpKernelContext* context) override { xla::XlaOp result = - xla::Sort(context->Input("keys"), context->Input("values")); + xla::Sort(context->Input("keys"), {context->Input("values")}); context->SetOutput(0, xla::GetTupleElement(result, 0)); context->SetOutput(1, xla::GetTupleElement(result, 1)); } diff --git a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc index 94108b764fd32fc77520f9a8ea16065c27e6accf..06a560d9471c352065ef7e9f6903ebdca542f5b1 100644 --- a/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tensor_array_ops.cc @@ -123,9 +123,10 @@ Status GetTensorArrayShape(const XlaResource* resource, xla::XlaOp DynamicAddSlice(xla::XlaBuilder* builder, const xla::XlaOp& operand, const xla::XlaOp& update, absl::Span update_dims, - const xla::XlaOp& start_indices) { + const xla::XlaOp& start_indices, DataType dtype) { xla::XlaOp current = xla::DynamicSlice(operand, start_indices, update_dims); - xla::XlaOp sum = xla::Add(current, update); + xla::XlaOp sum = + dtype == DT_BOOL ? xla::Or(current, update) : xla::Add(current, update); return xla::DynamicUpdateSlice(operand, sum, start_indices); } @@ -222,9 +223,16 @@ class TensorArrayWriteOp : public XlaOpKernel { slice_shape.InsertDim(0, 1LL); auto update = xla::Reshape(value, slice_shape.dim_sizes()); - xla::XlaOp written = - DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), start_indices); - + xla::XlaOp written; + if (resource->tensor_array_multiple_writes_aggregate()) { + written = DynamicAddSlice(b, ta, update, slice_shape.dim_sizes(), + start_indices, dtype_); + } else { + // TODO(b/117569591): Ideally we would report an error in the case that we + // see multiple writes to the same offset. Unfortunately there is no way + // to report errors at the moment, so we silently overwrite. + written = xla::DynamicUpdateSlice(ta, update, start_indices); + } OP_REQUIRES_OK(ctx, resource->SetValue(written)); ctx->SetOutput(0, flow); } @@ -391,7 +399,11 @@ class TensorArrayScatterOp : public XlaOpKernel { } if (scatter_all_elements_in_order) { - ta = xla::Add(ta, value); + if (dtype_ == DT_BOOL) { + ta = xla::Or(ta, value); + } else { + ta = xla::Add(ta, value); + } } else { auto slice_dims = value_shape.dim_sizes(); slice_dims[0] = 1LL; @@ -414,7 +426,7 @@ class TensorArrayScatterOp : public XlaOpKernel { auto start_indices = xla::Pad(xla::Reshape(index, {1}), xla::ConstantR0(b, 0), xla::MakeEdgePaddingConfig({{0, elem_shape.dims()}})); - ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices); + ta = DynamicAddSlice(b, ta, slice, slice_dims, start_indices, dtype_); } } @@ -522,8 +534,13 @@ class TensorArraySplitOp : public XlaOpKernel { value_shape.DebugString(), " vs. ", ta_shape.DebugString())); - OP_REQUIRES_OK(ctx, resource->SetValue(xla::Add( - ta, xla::Reshape(value, ta_shape.dim_sizes())))); + const xla::XlaOp reshape = xla::Reshape(value, ta_shape.dim_sizes()); + if (dtype_ == DT_BOOL) { + ta = xla::Or(ta, reshape); + } else { + ta = xla::Add(ta, reshape); + } + OP_REQUIRES_OK(ctx, resource->SetValue(ta)); ctx->SetOutput(0, flow); } diff --git a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc index 93d5996b5eaf10221b1d7067e7650b78cd6b8fef..52f2b36e19edd96f491f6706d1872e0d3af2df3b 100644 --- a/tensorflow/compiler/tf2xla/kernels/tile_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/tile_ops.cc @@ -96,7 +96,11 @@ class TileOp : public XlaOpKernel { // operation broadcast semantics. auto broadcasted_zero = xla::Broadcast( XlaHelpers::Zero(ctx->builder(), ctx->input_type(0)), output_shape); - ctx->SetOutput(0, xla::Add(broadcasted_zero, input)); + if (ctx->input_type(0) == DT_BOOL) { + ctx->SetOutput(0, xla::Or(broadcasted_zero, input)); + } else { + ctx->SetOutput(0, xla::Add(broadcasted_zero, input)); + } return; } diff --git a/tensorflow/compiler/tf2xla/ops/xla_ops.cc b/tensorflow/compiler/tf2xla/ops/xla_ops.cc index 557911553dab5609d774eee91c8e3d0f48489ef9..bd2c0a5ee88869ba60701c0a7ace05857452eed9 100644 --- a/tensorflow/compiler/tf2xla/ops/xla_ops.cc +++ b/tensorflow/compiler/tf2xla/ops/xla_ops.cc @@ -283,6 +283,8 @@ REGISTER_OP("XlaReduceWindow") .Input("init_value: T") .Input("window_dimensions: Tindices") .Input("window_strides: Tindices") + .Input("base_dilations: Tindices") + .Input("window_dilations: Tindices") .Input("padding: Tindices") .Attr("T: numbertype") .Attr("Tindices: {int32, int64}") diff --git a/tensorflow/compiler/tf2xla/python/xla.py b/tensorflow/compiler/tf2xla/python/xla.py index bc7924c37175e009a35614830ef7f82552b80838..5e86b5d8ec0a2690f004bc67decea09185d9cbb6 100644 --- a/tensorflow/compiler/tf2xla/python/xla.py +++ b/tensorflow/compiler/tf2xla/python/xla.py @@ -320,6 +320,8 @@ def reduce_window(operand, reducer, window_dimensions, window_strides=None, + base_dilations=None, + window_dilations=None, padding=None, name=None): """Wraps the XLA ReduceWindow operator. @@ -343,12 +345,16 @@ def reduce_window(operand, A tensor that represents the output of the reduce_window operator. """ window_strides = window_strides or [1] * len(window_dimensions) + base_dilations = base_dilations or [1] * len(window_dimensions) + window_dilations = window_dilations or [1] * len(window_dimensions) padding = padding or [(0, 0)] * len(window_dimensions) return gen_xla_ops.xla_reduce_window( input=operand, init_value=init, window_dimensions=window_dimensions, window_strides=window_strides, + base_dilations=base_dilations, + window_dilations=window_dilations, padding=padding, computation=reducer, name=name) diff --git a/tensorflow/compiler/tf2xla/side_effect_util.cc b/tensorflow/compiler/tf2xla/side_effect_util.cc index 6cd7b24592f30d7202b985f3dfd082ea2d85e344..b233e6b2c28e1968bb74901fc684e808ae45ab60 100644 --- a/tensorflow/compiler/tf2xla/side_effect_util.cc +++ b/tensorflow/compiler/tf2xla/side_effect_util.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/tf2xla/side_effect_util.h" +#include "absl/strings/numbers.h" #include "tensorflow/core/graph/algorithm.h" namespace tensorflow { @@ -64,4 +65,28 @@ bool HasSideEffectingNodes(const Graph& g) { return false; } +Status ParseHostComputeCoreList(absl::Span list_from_attr, + std::map* host_compute_core) { + for (const auto& hc_core : list_from_attr) { + std::vector parts = str_util::Split(hc_core, ":"); + if (parts.size() != 2) { + return errors::InvalidArgument( + "Malformed host_compute_core entry ", hc_core, + " should be :."); + } + int core; + if (!absl::numbers_internal::safe_strto32_base(parts[1], &core, 10)) { + return errors::InvalidArgument("Malformed host_compute_core entry ", + hc_core, + " part after ':' should be an integer."); + } + if (host_compute_core->find(parts[0]) != host_compute_core->end()) { + return errors::InvalidArgument( + "Duplicate host_compute_core entry for cluster ", parts[0]); + } + (*host_compute_core)[parts[0]] = core; + } + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/side_effect_util.h b/tensorflow/compiler/tf2xla/side_effect_util.h index ad07624729f0b0d2443b2fc43d32dfa3377ce115..f22ddb2f58e1fa5c10ca0fdb956d9136942388b7 100644 --- a/tensorflow/compiler/tf2xla/side_effect_util.h +++ b/tensorflow/compiler/tf2xla/side_effect_util.h @@ -42,6 +42,12 @@ std::set CalculateTokenInputsForOutputToken(const Graph& g); // Returns whether a graph contains side-effecting nodes. bool HasSideEffectingNodes(const Graph& g); +// Parse the mapping from outside_compilation_subgraph name to core number, +// which is specified in an attr as a list of strings +// :. +Status ParseHostComputeCoreList(absl::Span list_from_attr, + std::map* host_compute_core); + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_TF2XLA_SIDE_EFFECT_UTIL_H_ diff --git a/tensorflow/compiler/tf2xla/test_util.cc b/tensorflow/compiler/tf2xla/test_util.cc index f31bfb45a2f4db270446eb59259969dc0ab63a8e..3c6c9a91b6d2fb47f6dee1c347e9b852f1eea3ec 100644 --- a/tensorflow/compiler/tf2xla/test_util.cc +++ b/tensorflow/compiler/tf2xla/test_util.cc @@ -40,12 +40,4 @@ Status InstantiateFunctionForTest(const string& name, return Status::OK(); } -std::unordered_map BuildNodeIndex(const Graph& graph) { - std::unordered_map index; - for (Node* node : graph.nodes()) { - index[node->name()] = node; - } - return index; -} - } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/test_util.h b/tensorflow/compiler/tf2xla/test_util.h index 350a868568531c0d073e0cf600327d1ff9d62e3a..4ffc94ae3bc7c930720cd625a7856443c77be666 100644 --- a/tensorflow/compiler/tf2xla/test_util.h +++ b/tensorflow/compiler/tf2xla/test_util.h @@ -44,9 +44,6 @@ Status InstantiateFunctionForTest(const string& name, const FunctionLibraryDefinition& library, InstantiationResultForTest* result); -// Builds a map from node name to Node* for `graph`. -std::unordered_map BuildNodeIndex(const Graph& graph); - } // namespace tensorflow // Variant of TF_EXPECT_GRAPH_EQ that also compares internal attributes for diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.cc b/tensorflow/compiler/tf2xla/tf2xla_util.cc index 01dd3ba10fec85e6b1d411fbd32fbf9c58b5fe11..cc83db0562dd4ef1ae7b7a718a8f2e407acbfa1e 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util.cc @@ -330,8 +330,8 @@ uint32 GetXLARandomSeed() { // TODO(b/77601805): add tests for associated function related stuff. bool HasAssociatedFunction(const NodeDef& node_def, - FunctionLibraryRuntime* flr) { - if (flr->GetFunctionLibraryDefinition()->Contains(node_def.op())) { + const FunctionLibraryDefinition* fld) { + if (fld->Contains(node_def.op())) { return true; } @@ -351,10 +351,10 @@ bool HasAssociatedFunction(const NodeDef& node_def, } std::vector GetAssociatedFunctions( - const Node& node, FunctionLibraryRuntime* flr) { + const Node& node, const FunctionLibraryDefinition* fld) { std::vector results; const string& op = node.type_string(); - if (flr->GetFunctionLibraryDefinition()->Contains(op)) { + if (fld->Contains(op)) { // This is a function call node. AttrValueMap attrs(node.attrs().begin(), node.attrs().end()); results.emplace_back(AssociatedFunctionInfo::FunctionCall(op, attrs)); @@ -441,4 +441,28 @@ Status RewriteAssociatedFunction( return Status::OK(); } +Status CachedFunctionHandles::GetOrInstantiate( + const string& func_name, AttrSlice attrs, + FunctionLibraryRuntime::Handle* handle) { + string canonicalized_name = Canonicalize(func_name, attrs); + auto iter = handles_.find(canonicalized_name); + if (iter != handles_.end()) { + *handle = iter->second; + return Status::OK(); + } + + TF_RETURN_IF_ERROR(flr_->Instantiate(func_name, attrs, handle)); + handles_[canonicalized_name] = *handle; + return Status::OK(); +} + +Status CachedFunctionHandles::ReleaseAllHandles() { + Status result; + for (auto iter : handles_) { + result.Update(flr_->ReleaseHandle(iter.second)); + } + handles_.clear(); + return result; +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/tf2xla_util.h b/tensorflow/compiler/tf2xla/tf2xla_util.h index 53eab8b63e2fc8aa3dfb0bacfe065897ca775bd0..b974b998229982afc9168dcaf0799cfddd965a04 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util.h +++ b/tensorflow/compiler/tf2xla/tf2xla_util.h @@ -120,7 +120,7 @@ class AssociatedFunctionInfo { // Returns if the NodeDef has associated function. bool HasAssociatedFunction(const NodeDef& node_def, - FunctionLibraryRuntime* flr); + const FunctionLibraryDefinition* fld); // Gets functions associated with the node. Current cases: // 1. For function call node, its function name; @@ -128,7 +128,7 @@ bool HasAssociatedFunction(const NodeDef& node_def, // and returned attrs will be this node's attributes; // 3. For nodes like XlaWhile/XlaIf, all their function attributes. std::vector GetAssociatedFunctions( - const Node& node, FunctionLibraryRuntime* flr); + const Node& node, const FunctionLibraryDefinition* fld); // Changes associated functions for the node. Current cases: // 1. For function call node, creates a new node with the new function name and @@ -144,6 +144,30 @@ Status RewriteAssociatedFunction( // Attribute to mark nodes to be executed on host. extern const char kXlaOutsideCompilationAttrName[]; +// Class to act as cache for FunctionLibraryRuntime::Handle objects. +class CachedFunctionHandles { + public: + CachedFunctionHandles(FunctionLibraryRuntime* flr) : flr_(flr) {} + + // Populates `handle` for requested function and attributes. If we have + // instantiated the function with the same attributes before, `handle` will be + // cached handle; otherwise instantiate the function and populate `handle`. + Status GetOrInstantiate(const string& func_name, AttrSlice attrs, + FunctionLibraryRuntime::Handle* handle); + + // Releases all handles in the cache. Returns first non-OK status if any; + // returns OK otherwise. + Status ReleaseAllHandles(); + + ~CachedFunctionHandles() { ReleaseAllHandles().IgnoreError(); } + + private: + FunctionLibraryRuntime* flr_; + std::map handles_; + + TF_DISALLOW_COPY_AND_ASSIGN(CachedFunctionHandles); +}; + } // namespace tensorflow #endif // TENSORFLOW_COMPILER_TF2XLA_TF2XLA_UTIL_H_ diff --git a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc index 68441b3d4790b17bd06accff3fcdc8ccee79bbb7..202e929315cacd4d6cdfc69d50639d8a427ec6c2 100644 --- a/tensorflow/compiler/tf2xla/tf2xla_util_test.cc +++ b/tensorflow/compiler/tf2xla/tf2xla_util_test.cc @@ -23,11 +23,15 @@ limitations under the License. #include "tensorflow/cc/ops/function_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/compiler/tf2xla/sharding_util.h" +#include "tensorflow/core/common_runtime/graph_optimizer.h" +#include "tensorflow/core/common_runtime/process_function_library_runtime.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/version.h" namespace tensorflow { namespace { @@ -255,5 +259,75 @@ TEST(SetNodeShardingFromNeighbors, Basic) { EXPECT_EQ(1, parse_status.ValueOrDie().value().tile_assignment_devices(0)); } +REGISTER_OP("One") + .Output("y: T") + .Attr("T: {float, double, int32, int64}") + .Doc(R"doc( +Returns a tensor with a single element (1) of type T. + +y: A scalar in type T. + +)doc"); + +// Tests that CachedFunctionHandles class works. +TEST(CachedFunctionHandles, Basic) { + FunctionDef func = FunctionDefHelper::Define( + // Name + "TestFunc", + // Args + {}, + // Return values + {"y:T"}, + // Attr def + {"T:{float, double, int32, int64}"}, + // Nodes + { + {{"y"}, "One", {}, {{"T", "$T"}}}, + }); + FunctionDefLibrary proto; + *proto.add_function() = func; + FunctionLibraryDefinition fld(OpRegistry::Global(), proto); + std::unique_ptr pflr( + new ProcessFunctionLibraryRuntime( + /*device_mgr=*/nullptr, Env::Default(), TF_GRAPH_DEF_VERSION, &fld, + OptimizerOptions())); + FunctionLibraryRuntime* flr = + pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); + + CachedFunctionHandles cached_function_handles(flr); + + // Tests that GetOrInstantiate() works. + FunctionLibraryRuntime::Handle first_handle; + AttrValue attr; + attr.set_type(DT_FLOAT); + AttrValueMap attrs; + attrs["T"] = attr; + TF_ASSERT_OK(cached_function_handles.GetOrInstantiate( + "TestFunc", AttrSlice(&attrs), &first_handle)); + + // Tests that we can get FunctionBody. + const FunctionBody* body = flr->GetFunctionBody(first_handle); + EXPECT_NE(body, nullptr); + + // Tests that GetOrInstantiate() returns cached handle when called with same + // function name and attributes. + FunctionLibraryRuntime::Handle second_handle; + TF_ASSERT_OK(cached_function_handles.GetOrInstantiate( + "TestFunc", AttrSlice(&attrs), &second_handle)); + EXPECT_EQ(first_handle, second_handle); + + // Tests that GetOrInstantiate() returns new handle when called with same + // function name but different attributes. + attr.set_type(DT_INT32); + attrs["T"] = attr; + FunctionLibraryRuntime::Handle third_handle; + TF_ASSERT_OK(cached_function_handles.GetOrInstantiate( + "TestFunc", AttrSlice(&attrs), &third_handle)); + EXPECT_NE(first_handle, third_handle); + + // Tests that ReleaseAllHandles() works. + TF_EXPECT_OK(cached_function_handles.ReleaseAllHandles()); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_context.cc b/tensorflow/compiler/tf2xla/xla_context.cc index f247570d72c0287a33695de3d778cce2a2418921..2095a6b8099f48a867ec2c7c7d6e84d8f2426dce 100644 --- a/tensorflow/compiler/tf2xla/xla_context.cc +++ b/tensorflow/compiler/tf2xla/xla_context.cc @@ -138,7 +138,8 @@ Status XlaContext::CreateResource( const std::set& tensor_array_gradients, XlaResource** resource) { resources_.emplace_back( new XlaResource(kind, arg_num, std::move(name), type, std::move(shape), - handle, tensor_array_size, tensor_array_gradients)); + handle, tensor_array_size, tensor_array_gradients, + /*tensor_array_multiple_writes_aggregate=*/false)); *resource = resources_.back().get(); return Status::OK(); } diff --git a/tensorflow/compiler/tf2xla/xla_resource.cc b/tensorflow/compiler/tf2xla/xla_resource.cc index 56c2e01055665954b99ea635e56666fbd8b96026..63b09c8f02a60e91576544d13227d29f56d3e88c 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.cc +++ b/tensorflow/compiler/tf2xla/xla_resource.cc @@ -29,7 +29,8 @@ namespace tensorflow { XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, TensorShape shape, const xla::XlaOp& initial_value, int64 tensor_array_size, - const std::set& tensor_array_gradients) + const std::set& tensor_array_gradients, + bool tensor_array_multiple_writes_aggregate) : kind_(kind), arg_num_(arg_num), name_(std::move(name)), @@ -37,14 +38,17 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type, shape_(std::move(shape)), value_(initial_value), initial_value_(initial_value), - tensor_array_size_(tensor_array_size) { + tensor_array_size_(tensor_array_size), + tensor_array_multiple_writes_aggregate_( + tensor_array_multiple_writes_aggregate) { CHECK(kind_ != kInvalid); for (const string& gradient : tensor_array_gradients) { tensor_array_gradients_[gradient].reset(new XlaResource( /*kind=*/kTensorArray, /*arg_num=*/-1, /*name=*/absl::StrCat("TensorArrayGrad: ", name_), type_, shape_, - xla::XlaOp(), tensor_array_size_, /*tensor_array_gradients=*/{})); + xla::XlaOp(), tensor_array_size_, /*tensor_array_gradients=*/{}, + /*tensor_array_multiple_writes_aggregate=*/true)); } } @@ -137,7 +141,8 @@ Status XlaResource::GetOrCreateTensorArrayGradient(const string& source, new XlaResource(/*kind=*/kTensorArray, /*arg_num=*/-1, /*name=*/absl::StrCat("TensorArrayGrad: ", name_), type_, shape_, gradient_value, tensor_array_size_, - /*tensor_array_gradients=*/{})); + /*tensor_array_gradients=*/{}, + /*tensor_array_multiple_writes_aggregate=*/true)); } *gradient_out = gradient.get(); return Status::OK(); diff --git a/tensorflow/compiler/tf2xla/xla_resource.h b/tensorflow/compiler/tf2xla/xla_resource.h index 2438490be13809b9f3571a362900b44cb838e76b..aa9ce1b171f11ea0de4db0123098729c1c97f93a 100644 --- a/tensorflow/compiler/tf2xla/xla_resource.h +++ b/tensorflow/compiler/tf2xla/xla_resource.h @@ -39,7 +39,8 @@ class XlaResource { XlaResource(Kind kind, int arg_num, string name, DataType type, TensorShape shape, const xla::XlaOp& initial_value, int64 tensor_array_size, - const std::set& tensor_array_gradients); + const std::set& tensor_array_gradients, + bool tensor_array_multiple_writes_aggregate); XlaResource(const XlaResource&) = delete; XlaResource(XlaResource&&) = delete; @@ -113,6 +114,8 @@ class XlaResource { const xla::XlaOp& pack, xla::XlaBuilder* builder); // TensorArray and Stack specific fields + // TODO(phawkins): refactor this code to use subclasses, rather than putting + // kind-specific fields in XlaResource. // 'tensor_array_size' stores the expected size of the TensorArray or Stack. // We need to store this since sometimes TensorArrays must be initialized @@ -121,6 +124,10 @@ class XlaResource { int64 tensor_array_size() const { return tensor_array_size_; } void set_tensor_array_size(int64 size) { tensor_array_size_ = size; } + bool tensor_array_multiple_writes_aggregate() const { + return tensor_array_multiple_writes_aggregate_; + } + // 'tensor_array_gradient' is a map from TensorArrayGradV3 'source' attributes // to an XlaResource containing the gradient TensorArrays. We store a pointer // here since there should only be one gradient TensorArray per 'source' @@ -143,6 +150,7 @@ class XlaResource { xla::XlaOp initial_value_; int64 tensor_array_size_ = -1; + bool tensor_array_multiple_writes_aggregate_ = false; std::map> tensor_array_gradients_; }; diff --git a/tensorflow/compiler/xla/client/lib/sorting.cc b/tensorflow/compiler/xla/client/lib/sorting.cc index a904be259a3870a679b2c4699ec01e2a11b1ce46..0475fd9c94f6e390b5169cfe2cbba8eae28ddc18 100644 --- a/tensorflow/compiler/xla/client/lib/sorting.cc +++ b/tensorflow/compiler/xla/client/lib/sorting.cc @@ -29,7 +29,7 @@ XlaOp TopK(XlaOp input, int64 k) { auto input_dims = input_shape.dimensions(); std::vector broadcast_dims(input_dims.begin(), input_dims.end() - 1); XlaOp broadcast_s32 = Broadcast(iota_s32, broadcast_dims); - XlaOp sort_result = Sort(Neg(input), broadcast_s32); + XlaOp sort_result = Sort(Neg(input), {broadcast_s32}); std::vector start_indices(input_shape.dimensions_size(), 0); std::vector limit_indices(input_dims.begin(), input_dims.end()); limit_indices[last_dim] = k; diff --git a/tensorflow/compiler/xla/client/lib/testing.cc b/tensorflow/compiler/xla/client/lib/testing.cc index ff0ec76a7f9b62fce0f14beae688cb0dd74847a1..a44681f586278bf03f3fb2b8c812936cbf3ad47b 100644 --- a/tensorflow/compiler/xla/client/lib/testing.cc +++ b/tensorflow/compiler/xla/client/lib/testing.cc @@ -93,9 +93,9 @@ std::unique_ptr MakeFakeDataOrDie(const Shape& shape, std::vector> MakeFakeArgumentsOrDie( const XlaComputation& computation, Client* client) { - CHECK(computation.proto().has_program_shape()) + CHECK(computation.proto().has_host_program_shape()) << "Computation should have progran shape."; - auto program_shape = computation.proto().program_shape(); + auto program_shape = computation.proto().host_program_shape(); std::vector> results; for (const Shape& shape : program_shape.parameters()) { diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index d196252db16fe84d44824856a2202c1a5d3fce95..7d081b27222bd31ddbe7c64b4dea8a4d5a371acb 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -278,7 +278,7 @@ StatusOr XlaBuilder::Build(int64 root_id) { module->set_id(entry.id()); module->set_entry_computation_name(entry.name()); module->set_entry_computation_id(entry.id()); - *module->mutable_program_shape() = entry.program_shape(); + *module->mutable_host_program_shape() = entry.program_shape(); for (auto& e : embedded_) { module->add_computations()->Swap(&e.second); } @@ -1279,9 +1279,10 @@ XlaOp XlaBuilder::AfterAll(absl::Span tokens) { }); } -XlaOp XlaBuilder::CustomCall(const string& call_target_name, - absl::Span operands, - const Shape& shape, const string& opaque) { +XlaOp XlaBuilder::CustomCall( + const string& call_target_name, absl::Span operands, + const Shape& shape, const string& opaque, + absl::optional> operand_shapes_with_layout) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; if (absl::StartsWith(call_target_name, "$")) { @@ -1293,6 +1294,31 @@ XlaOp XlaBuilder::CustomCall(const string& call_target_name, *instr.mutable_shape() = shape; instr.set_custom_call_target(call_target_name); instr.set_custom_call_opaque(opaque); + if (operand_shapes_with_layout.has_value()) { + if (!LayoutUtil::HasLayout(shape)) { + return InvalidArgument( + "Result shape must have layout for custom call with constrained " + "layout."); + } + if (operands.size() != operand_shapes_with_layout->size()) { + return InvalidArgument( + "Must specify a shape with layout for each operand for custom call " + "with constrained layout; given %d shapes, expected %d", + operand_shapes_with_layout->size(), operands.size()); + } + instr.set_constrain_layout(true); + int64 operand_num = 0; + for (const Shape& operand_shape : *operand_shapes_with_layout) { + if (!LayoutUtil::HasLayout(operand_shape)) { + return InvalidArgument( + "No layout specified for operand %d for custom call with " + "constrained layout.", + operand_num); + } + *instr.add_operand_shapes_with_layout() = operand_shape; + ++operand_num; + } + } return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands); }); } @@ -1468,18 +1494,17 @@ XlaOp XlaBuilder::Rev(const XlaOp& operand, }); } -XlaOp XlaBuilder::Sort(XlaOp keys, absl::optional values, +XlaOp XlaBuilder::Sort(const XlaOp& keys, absl::Span values, int64 dimension) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; std::vector operand_shape_ptrs; TF_ASSIGN_OR_RETURN(const Shape& keys_shape, GetShape(keys)); operand_shape_ptrs.push_back(&keys_shape); - Shape values_shape; - if (values.has_value()) { - TF_ASSIGN_OR_RETURN(values_shape, GetShape(*values)); - operand_shape_ptrs.push_back(&values_shape); - } + TF_ASSIGN_OR_RETURN(std::vector values_shapes, + GetOperandShapes(values)); + absl::c_transform(values_shapes, std::back_inserter(operand_shape_ptrs), + [](const Shape& shape) { return &shape; }); TF_ASSIGN_OR_RETURN(*instr.mutable_shape(), ShapeInference::InferVariadicOpShape( HloOpcode::kSort, operand_shape_ptrs)); @@ -1488,10 +1513,9 @@ XlaOp XlaBuilder::Sort(XlaOp keys, absl::optional values, dimension = ShapeUtil::Rank(keys_shape) - 1; } instr.add_dimensions(dimension); - return values.has_value() - ? AddInstruction(std::move(instr), HloOpcode::kSort, - {keys, *values}) - : AddInstruction(std::move(instr), HloOpcode::kSort, {keys}); + std::vector operands{keys}; + operands.insert(operands.end(), values.begin(), values.end()); + return AddInstruction(std::move(instr), HloOpcode::kSort, operands); }); } @@ -1789,9 +1813,9 @@ XlaOp XlaBuilder::ReduceWindow(const XlaOp& operand, const XlaOp& init_value, std::vector> padding_values = MakePadding(AsInt64Slice(operand_shape.dimensions()), window_dimensions, window_strides, padding); - return ReduceWindowWithGeneralPadding(operand, init_value, computation, - window_dimensions, window_strides, - padding_values); + return ReduceWindowWithGeneralPadding( + operand, init_value, computation, window_dimensions, window_strides, + /*base_dilations=*/{}, /*window_dilations=*/{}, padding_values); }); } @@ -1800,6 +1824,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; @@ -1810,7 +1836,8 @@ XlaOp XlaBuilder::ReduceWindowWithGeneralPadding( computation.GetProgramShape()); TF_ASSIGN_OR_RETURN(*instr.mutable_window(), MakeWindow(window_dimensions, window_strides, padding, - /*lhs_dilation=*/{}, /*rhs_dilation=*/{})); + /*lhs_dilation=*/base_dilations, + /*rhs_dilation=*/window_dilations)); TF_ASSIGN_OR_RETURN( *instr.mutable_shape(), ShapeInference::InferReduceWindowShape(operand_shape, init_shape, @@ -2330,7 +2357,7 @@ StatusOr XlaBuilder::BuildConstantSubGraph( module->set_id(entry.id()); module->set_entry_computation_name(entry.name()); module->set_entry_computation_id(entry.id()); - *module->mutable_program_shape() = *program_shape; + *module->mutable_host_program_shape() = *program_shape; for (auto& e : embedded_) { if (related_calls.find(e.second.id()) != related_calls.end()) { *module->add_computations() = e.second; @@ -2687,7 +2714,16 @@ XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, absl::Span operands, const Shape& shape, const string& opaque) { - return builder->CustomCall(call_target_name, operands, shape, opaque); + return builder->CustomCall(call_target_name, operands, shape, opaque, + /*operand_shapes_with_layout=*/absl::nullopt); +} + +XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name, + absl::Span operands, const Shape& shape, + absl::Span operand_shapes_with_layout, + const string& opaque) { + return builder->CustomCall(call_target_name, operands, shape, opaque, + operand_shapes_with_layout); } XlaOp Complex(const XlaOp& real, const XlaOp& imag, @@ -2800,10 +2836,12 @@ XlaOp ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return operand.builder()->ReduceWindowWithGeneralPadding( operand, init_value, computation, window_dimensions, window_strides, - padding); + base_dilations, window_dilations, padding); } XlaOp CrossReplicaSum(const XlaOp& operand, @@ -2914,8 +2952,8 @@ XlaOp Rev(const XlaOp& operand, absl::Span dimensions) { return operand.builder()->Rev(operand, dimensions); } -XlaOp Sort(XlaOp keys, absl::optional values, int64 dimension) { - return keys.builder()->Sort(keys, std::move(values), dimension); +XlaOp Sort(const XlaOp& keys, absl::Span values, int64 dimension) { + return keys.builder()->Sort(keys, values, dimension); } XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max) { diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index cd0d5ca5d3043ca13bbfda40eacc04b86659a85c..5747661c34b411bbf22575f9c1d9fe09aa32911f 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -577,9 +577,10 @@ class XlaBuilder { absl::Span operands); // Enqueues a custom call instruction onto the computation. - XlaOp CustomCall(const string& call_target_name, - absl::Span operands, const Shape& shape, - const string& opaque); + XlaOp CustomCall( + const string& call_target_name, absl::Span operands, + const Shape& shape_with_layout, const string& opaque, + absl::optional> operand_shapes_with_layout); // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one @@ -671,6 +672,8 @@ class XlaBuilder { const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); // Returns the sum of the operand value within each subgroup of replicas. All @@ -696,7 +699,7 @@ class XlaBuilder { // the same channel_id, they will be 'Allreduce'd. If empty, Allreduce will // not be applied cross modules. // - // TODO(b/79737069): Rename this to AllReduce when it's ready to use. + // TODO(b/117564385): Rename this to AllReduce when it's ready to use. XlaOp CrossReplicaSum( const XlaOp& operand, const XlaComputation& computation, absl::Span replica_groups = {}, @@ -831,12 +834,12 @@ class XlaBuilder { // the last dimension is chosen by default. // // If both keys and values are provided: - // * The keys and the values must tensors with the same dimensions. The + // * The keys and all values must be tensors with the same dimensions. The // element types of the tensors may be different. // * The result is a tuple that consists of a sorted tensor of keys (along the - // provided dimension, as above) as the first element, and a tensor with their - // corresponding values as the second element. - XlaOp Sort(XlaOp keys, absl::optional values = absl::nullopt, + // provided dimension, as above) as the first element, and tensors with their + // corresponding values as the other elements. + XlaOp Sort(const XlaOp& keys, absl::Span values = {}, int64 dimension = -1); // Enqueues a clamp instruction onto the computation. @@ -1195,6 +1198,10 @@ class XlaBuilder { friend XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, absl::Span operands, const Shape& shape, const string& opaque); + friend XlaOp CustomCallWithLayout( + XlaBuilder* builder, const string& call_target_name, + absl::Span operands, const Shape& shape_with_layout, + absl::Span operand_shapes_with_layout, const string& opaque); friend XlaOp Complex(const XlaOp& real, const XlaOp& imag, absl::Span broadcast_dimensions); friend XlaOp Conj(const XlaOp& operand); @@ -1245,6 +1252,8 @@ class XlaBuilder { const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); friend XlaOp CrossReplicaSum(const XlaOp& operand, absl::Span replica_groups); @@ -1302,7 +1311,8 @@ class XlaBuilder { friend XlaOp Transpose(const XlaOp& operand, absl::Span permutation); friend XlaOp Rev(const XlaOp& operand, absl::Span dimensions); - friend XlaOp Sort(XlaOp keys, absl::optional values, int64 dimension); + friend XlaOp Sort(const XlaOp& keys, absl::Span values, + int64 dimension); friend XlaOp Clamp(const XlaOp& min, const XlaOp& operand, const XlaOp& max); friend XlaOp Map(XlaBuilder* builder, absl::Span operands, const XlaComputation& computation, @@ -1728,6 +1738,17 @@ XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, absl::Span operands, const Shape& shape, const string& opaque = ""); +// Overload which constructs a custom call with fixed layouts. The operands will +// have the layouts specified by |operand_shapes_with_layout| when provided to +// external code, and the external code is expected to produce a result with the +// layout specified by |shape_with_layout|. All shapes in |shape_with_layout| +// and |operand_shapes_with_layout| must have layouts. +XlaOp CustomCallWithLayout(XlaBuilder* builder, const string& call_target_name, + absl::Span operands, + const Shape& shape_with_layout, + absl::Span operand_shapes_with_layout, + const string& opaque = ""); + // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one // of the operands is a scalar, or an explicit broadcast dimension is given @@ -1818,6 +1839,8 @@ XlaOp ReduceWindowWithGeneralPadding( const XlaComputation& computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding); // Returns the sum of the operand value within each subgroup of replicas. All @@ -1842,7 +1865,7 @@ XlaOp CrossReplicaSum(const XlaOp& operand, // same channel_id, they will be 'Allreduce'd. If empty, Allreduce will not be // applied cross modules. // -// TODO(b/79737069): Rename this to AllReduce when it's ready to use. +// TODO(b/117564385): Rename this to AllReduce when it's ready to use. XlaOp CrossReplicaSum( const XlaOp& operand, const XlaComputation& computation, absl::Span replica_groups = {}, @@ -1980,12 +2003,12 @@ XlaOp Rev(const XlaOp& operand, absl::Span dimensions); // the last dimension is chosen by default. // // If both keys and values are provided: -// * The keys and the values must tensors with the same dimensions. The +// * The keys and all values must be tensors with the same dimensions. The // element types of the tensors may be different. // * The result is a tuple that consists of a sorted tensor of keys (along the -// provided dimension, as above) as the first element, and a tensor with their -// corresponding values as the second element. -XlaOp Sort(XlaOp keys, absl::optional values = absl::nullopt, +// provided dimension, as above) as the first element, and tensors with their +// corresponding values as the other elements. +XlaOp Sort(const XlaOp& keys, absl::Span values = {}, int64 dimension = -1); // Enqueues a clamp instruction onto the computation. diff --git a/tensorflow/compiler/xla/client/xla_computation.cc b/tensorflow/compiler/xla/client/xla_computation.cc index 22c9e83bb2ae9e3e205bdd480b64c703e31c6ffd..c9870b65b91c1ebd7d44143faf215a2d5c2a2fc5 100644 --- a/tensorflow/compiler/xla/client/xla_computation.cc +++ b/tensorflow/compiler/xla/client/xla_computation.cc @@ -24,8 +24,8 @@ limitations under the License. namespace xla { StatusOr XlaComputation::GetProgramShape() const { - TF_RET_CHECK(proto_.has_program_shape()); - return proto_.program_shape(); + TF_RET_CHECK(proto_.has_host_program_shape()); + return proto_.host_program_shape(); } StatusOr> XlaComputation::Snapshot() const { diff --git a/tensorflow/compiler/xla/layout_util.cc b/tensorflow/compiler/xla/layout_util.cc index d310335618ded7b581e6ed632223218585bb791f..19667b7ed9d47896afd9a82a41de7997538b089b 100644 --- a/tensorflow/compiler/xla/layout_util.cc +++ b/tensorflow/compiler/xla/layout_util.cc @@ -65,6 +65,12 @@ void SetDefaultLayoutToContainer( return layout; } +/* static */ Layout LayoutUtil::MakeDescendingLayout(int64 rank) { + std::vector layout(rank); + std::iota(layout.rbegin(), layout.rend(), static_cast(0)); + return MakeLayout(layout); +} + /* static */ Layout LayoutUtil::MakeLayoutFromMajorToMinor( absl::Span major_to_minor) { Layout layout; @@ -199,7 +205,7 @@ Layout CreateDefaultLayoutForRank(int64 rank) { return Status::OK(); } - if (layout.format() == INVALID_FORMAT) { + if (layout.format() == INVALID_FORMAT || !Format_IsValid(layout.format())) { return InvalidArgument( "Layout does not have a valid format: layout {%s}, shape {%s}", layout.ShortDebugString(), shape.ShortDebugString()); diff --git a/tensorflow/compiler/xla/layout_util.h b/tensorflow/compiler/xla/layout_util.h index b78883c2d870043032306637730c4666665125a8..af032b1cae4c5645d6c7da55b779cd0a7336592e 100644 --- a/tensorflow/compiler/xla/layout_util.h +++ b/tensorflow/compiler/xla/layout_util.h @@ -40,6 +40,10 @@ class LayoutUtil { static Layout MakeLayoutFromMajorToMinor( absl::Span major_to_minor); + // Returns a layout with descending ((i.e. {n, n-1, ..., 0}) minor-to-major + // dimensions. + static Layout MakeDescendingLayout(int64 rank); + // Creates a sparse layout with the given maximum number of elements. (This is // a convenience function for protobuf construction.) static Layout MakeSparseLayout(int64 max_sparse_elements); diff --git a/tensorflow/compiler/xla/python/local_computation_builder.cc b/tensorflow/compiler/xla/python/local_computation_builder.cc index cd5fd330298fb0ff158e232dac121f8ffb271218..92df404b8ec0aed4899906877a4dd41102bdf7a0 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.cc +++ b/tensorflow/compiler/xla/python/local_computation_builder.cc @@ -532,10 +532,13 @@ LocalOp LocalComputationBuilder::ReduceWindowWithGeneralPadding( const LocalComputation& local_computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span> padding) { return xla::ReduceWindowWithGeneralPadding( operand.op(), init_value.op(), local_computation.computation(), - window_dimensions, window_strides, padding); + window_dimensions, window_strides, base_dilations, window_dilations, + padding); } LocalOp LocalComputationBuilder::RngNormal(const LocalOp& mu, @@ -569,13 +572,13 @@ StatusOr LocalComputationBuilder::IsConstant(const LocalOp& operand) { } LocalOp LocalComputationBuilder::Sort(const LocalOp& operand, int64 dimension) { - return xla::Sort(operand.op(), absl::nullopt, dimension); + return xla::Sort(operand.op(), {}, dimension); } LocalOp LocalComputationBuilder::SortKeyVal(const LocalOp& keys, const LocalOp& values, int64 dimension) { - return xla::Sort(keys.op(), values.op(), dimension); + return xla::Sort(keys.op(), {values.op()}, dimension); } StatusOr LocalComputationBuilder::BuildConstantSubGraph( diff --git a/tensorflow/compiler/xla/python/local_computation_builder.h b/tensorflow/compiler/xla/python/local_computation_builder.h index 2166bb6721ca380f3180a8802e4922f2e9e45945..43332e0abd410c08dc5a40f7de39dbc96d34a72c 100644 --- a/tensorflow/compiler/xla/python/local_computation_builder.h +++ b/tensorflow/compiler/xla/python/local_computation_builder.h @@ -278,6 +278,8 @@ class LocalComputationBuilder { const LocalComputation& local_computation, absl::Span window_dimensions, absl::Span window_strides, + absl::Span base_dilations, + absl::Span window_dilations, absl::Span > padding); LocalOp RngNormal(const LocalOp& mu, const LocalOp& sigma, diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index bb303c5678a2cac9a9e78925e857ab25c0c6d9be..f8197488fb3bacb312cc7fbf149b773851992b8a 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -995,7 +995,30 @@ class ComputationBuilder(object): window_strides) return self._client.ReduceWindowWithGeneralPadding( operand, init_value, computation_to_apply.c_local_computation, - window_dimensions, window_strides, pads) + window_dimensions, window_strides, (), (), pads) + + def ReduceWindowWithGeneralPadding( + self, operand, init_value, computation_to_apply, window_dimensions, + window_strides, base_dilations, window_dilations, padding): + """Enqueues a windowed reduction operation onto the computation. + + Args: + operand: reduction operand (LocalOp). + init_value: reduction initial value (LocalOp). + computation_to_apply: a binary reduction function (Computation). + window_dimensions: dimensions of window (sequence of integers). + window_strides: strides for window (sequence of integers). + base_dilations: dilations for the base (sequence of integers). + window_dilations: dilations for window (sequence of integers). + padding: length-N array-like of pairs of integers of (low, high) padding. + + Returns: + A LocalOp representing the added ReduceWindow op. + """ + return self._client.ReduceWindowWithGeneralPadding( + operand, init_value, computation_to_apply.c_local_computation, + window_dimensions, window_strides, base_dilations, window_dilations, + padding) def RngNormal(self, mu, sigma, dims): """Enqueues an RngNormal operation onto the computation. diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 2b292ed0537acf4d0c0c4831b19a66ffbb279605..7d03eba800f6882efae448e3e41c488c513f4a84 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -294,6 +294,7 @@ cc_library( srcs = [ "dfs_hlo_visitor.cc", "hlo_computation.cc", + "hlo_input_output_alias_config.cc", "hlo_instruction.cc", "hlo_instructions.cc", "hlo_module.cc", @@ -308,6 +309,7 @@ cc_library( "hlo_clone_context.h", "hlo_computation.h", "hlo_domain_metadata.h", + "hlo_input_output_alias_config.h", "hlo_instruction.h", "hlo_instructions.h", "hlo_module.h", @@ -858,6 +860,7 @@ cc_library( ":executable", ":hlo", ":hlo_module_config", + ":hlo_module_group", ":logical_buffer", "//tensorflow/compiler/xla:statusor", "//tensorflow/compiler/xla:types", @@ -1268,6 +1271,25 @@ tf_cc_test( ], ) +tf_cc_test( + name = "hlo_input_output_alias_config_test", + srcs = ["hlo_input_output_alias_config_test.cc"], + deps = [ + ":hlo", + ":hlo_dce", + ":hlo_memory_scheduler", + ":hlo_ordering", + ":hlo_parser", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:test", + "@com_google_absl//absl/algorithm:container", + ], +) + cc_library( name = "hlo_memory_scheduler", srcs = ["hlo_memory_scheduler.cc"], @@ -3127,6 +3149,7 @@ cc_library( ":buffer_assignment", ":hlo", ":hlo_proto", + ":hlo_verifier", "//tensorflow/compiler/xla:status", "//tensorflow/compiler/xla:util", ], diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 75dae7a7141647d7b7b60b0e07e11c143621ea63..ca71f2cc129fc5d14e454c98a6e5ebf2e94cd7d2 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -2057,6 +2057,12 @@ Status AlgebraicSimplifierVisitor::HandleReduceWindow( return Status::OK(); } + // Bail on dilation. + if (window_util::HasDilation(window)) { + VLOG(10) << "Not folding pad into reduce-window as there is dilation."; + return Status::OK(); + } + VLOG(10) << "Considering folding Pad: " << pad->ToString() << "\ninto reduce-window: " << reduce_window->ToString() << (convert != nullptr @@ -2203,7 +2209,7 @@ Status AlgebraicSimplifierVisitor::HandleSort(HloInstruction* sort) { } // If it is key/value sort, the output of sort is a tuple. return ReplaceWithNewInstruction( - sort, HloInstruction::CreateTuple({operand, sort->mutable_operand(1)})); + sort, HloInstruction::CreateTuple(sort->operands())); } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 2047f894b465816eb97ba205e79033bd52bf7a0c..42d1f337dc22b91dcef4eb8ed4c0c57c6febeb70 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2133,16 +2133,20 @@ TEST_F(AlgebraicSimplifierTest, ReplaceEffectiveScalarKeyValueSortWithTuple) { Shape values_shape = ShapeUtil::MakeShape(S32, {5, 0}); auto keys = builder.AddInstruction( HloInstruction::CreateParameter(0, keys_shape, "keys")); - auto values = builder.AddInstruction( - HloInstruction::CreateParameter(1, values_shape, "values")); + auto values0 = builder.AddInstruction( + HloInstruction::CreateParameter(1, values_shape, "values0")); + auto values1 = builder.AddInstruction( + HloInstruction::CreateParameter(2, values_shape, "values1")); builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values)); + ShapeUtil::MakeTupleShape({keys_shape, values_shape, values_shape}), 0, + keys, {values0, values1})); auto module = CreateNewModule(); HloComputation* computation = module->AddEntryComputation(builder.Build()); AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, non_bitcasting_callback()); ASSERT_TRUE(simplifier.Run(module).ValueOrDie()); - EXPECT_THAT(computation->root_instruction(), op::Tuple(keys, values)); + EXPECT_THAT(computation->root_instruction(), + op::Tuple(keys, values0, values1)); } // Used for TEST_Ps that test merging (or not) of a kPad instruction into a diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc index 1ed6142dcecdc830cb7b8386e0cc20a2ea54aa7f..ef5e211646e7b0b66b8e6c09948be58063422943 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.cc +++ b/tensorflow/compiler/xla/service/allocation_tracker.cc @@ -176,13 +176,13 @@ StatusOr> AllocationTracker::DeconstructTuple( } StatusOr> AllocationTracker::Resolve( - const GlobalDataHandle& data) { + const GlobalDataHandle& data) const { tensorflow::mutex_lock lock(mutex_); return AllocationTracker::ResolveInternal(data); } StatusOr AllocationTracker::ResolveForReplica( - const GlobalDataHandle& data, int replica_id) { + const GlobalDataHandle& data, int replica_id) const { tensorflow::mutex_lock lock(mutex_); TF_ASSIGN_OR_RETURN(std::vector replicated_buffers, ResolveInternal(data)); @@ -196,7 +196,7 @@ StatusOr AllocationTracker::ResolveForReplica( } StatusOr> AllocationTracker::ResolveInternal( - const GlobalDataHandle& data) { + const GlobalDataHandle& data) const { VLOG(2) << "resolve:" << data.handle(); auto it = handle_to_shaped_buffers_.find(data.handle()); if (it == handle_to_shaped_buffers_.end()) { diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h index 43feccee3c67152c6f61098bb98d546379848b8c..98d1a302a9f66f4a00e05d62837a79133e222687 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.h +++ b/tensorflow/compiler/xla/service/allocation_tracker.h @@ -65,13 +65,13 @@ class AllocationTracker { // replica, or provide an error status to say whether any of those buffers // were not found (or found, but found deallocated). StatusOr> Resolve( - const GlobalDataHandle& data); + const GlobalDataHandle& data) const; // Resolves a handle from an XLA client and replica id to a shaped buffer, or // provide an error status to say whether it was not found (or found, but // found deallocated). StatusOr ResolveForReplica(const GlobalDataHandle& data, - int replica_id); + int replica_id) const; private: // Data structure encapsulating single memory allocation on the device. @@ -87,7 +87,7 @@ class AllocationTracker { // Internal helper which resolves the given GlobalDataHandle to a // list of ScopedShapedBuffers. StatusOr> ResolveInternal( - const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_); + const GlobalDataHandle& data) const EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Internal helper which registers a vector of shaped buffers, one per // replica. ShapedBufferTy is either ScopedShapedBuffer or ShapedBuffer. If @@ -113,7 +113,7 @@ class AllocationTracker { // maintained per device ordinal. using AllocationMap = absl::flat_hash_map; - tensorflow::mutex mutex_; + mutable tensorflow::mutex mutex_; // Backend to use with this tracker. The backend supplies the memory allocator // to use when deallocating memory. diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc index d5b1148058898596bfdb837826a590bbc74e202a..1251f0258f5d43a490ad654f519fee9076590453 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc @@ -231,6 +231,10 @@ Status BFloat16NormalizationVisitor::HandleMultipleOutputs( for (auto* user : materialized_users) { TF_RETURN_IF_ERROR(hlo->ReplaceUseWith(user, tuple)); } + bool is_root = computation_->root_instruction() == hlo; + if (is_root) { + computation_->set_root_instruction(tuple); + } *tuple->mutable_shape() = original_shape; return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc index cef0eba14e9dd463d6c32b047211bf25a84478f6..cb075a5e38a5ea9db2ceb432b2b59f8db5e2e640 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -284,7 +284,7 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) { HloInstruction::CreateParameter(1, s32_shape, "value")); HloInstruction* sort = builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, value)); + ShapeUtil::MakeTupleShape({bf16_shape, s32_shape}), 0, key, {value})); HloInstruction* gte = builder.AddInstruction( HloInstruction::CreateGetTupleElement(bf16_shape, sort, 0)); @@ -298,6 +298,30 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSort) { EXPECT_EQ(ShapeUtil::GetSubshape(sort->shape(), {0}).element_type(), F32); } +TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleSortRoot) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape f32_shape = ShapeUtil::MakeShape(F32, {1024}); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {1024}); + + HloInstruction* key = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32_shape, "key")); + HloInstruction* value = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "value")); + + HloInstruction* sort = builder.AddInstruction(HloInstruction::CreateSort( + ShapeUtil::MakeTupleShape({bf16_shape, bf16_shape}), 0, key, {value})); + + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module)); + + EXPECT_EQ(sort->operand(0)->shape().element_type(), F32); + EXPECT_EQ(ShapeUtil::GetSubshape(sort->shape(), {0}).element_type(), F32); + EXPECT_NE(computation->root_instruction(), sort); + EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kTuple); +} + // Tests that the normalization should not cause unsupported mixed precision due // to resolving unsupported BF16 operand. TEST_F(BFloat16NormalizationTest, DoNotAddUnsupportedMixedPrecision) { diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 2c2d1626c2c0d5d4b13e401dad9fd6c51514fc13..d5d6a044a81303425495202d8a98c6735b0b8b89 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -239,7 +239,7 @@ BufferAllocation::Slice BufferAllocation::GetSlice( void BufferAllocation::AddAssignment(const LogicalBuffer& buffer, int64 offset, int64 size) { - VLOG(4) << "Trying to add " << buffer << " to " << this; + VLOG(4) << "Trying to add " << buffer << " to allocation #" << index(); CHECK(assigned_buffers_.count(&buffer) == 0) << "LogicalBuffer " << buffer << " already assigned to allocation " << index_; @@ -784,21 +784,6 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation, } } - if (allow_input_output_aliasing_ && allocation->maybe_live_out()) { - const HloComputation* entry_computation = - assignment->module_->entry_computation(); - for (auto param : entry_computation->parameter_instructions()) { - for (auto& param_buffer : - assignment->points_to_analysis().GetBuffersDefinedByInstruction( - param)) { - if (assignment->liveness().MayInterfere(*param_buffer, buffer)) { - VLOG(4) << "Can't assign: Parameter interference with result"; - return false; - } - } - } - } - // If the buffer is live out of the computation then it should only be // assigned a buffer which exactly fits the result to avoid wasting memory // (result buffers can have arbitrary lifetimes). @@ -1434,13 +1419,28 @@ BufferAssigner::MergeColocatedBufferSets( // Builds sets of buffers in 'colocated_buffer_sets' which should be colocated // in the same allocation (currently just supports kWhile, kCall, and -// kConditional). +// kConditional and input output aliasing). void BufferAssigner::BuildColocatedBufferSets( const HloModule* module, const BufferLiveness& buffer_liveness, const LogicalBuffer::SizeFunction& buffer_size, std::vector* colocated_buffer_sets) { const TuplePointsToAnalysis& points_to_analysis = buffer_liveness.points_to_analysis(); + + // Set up colocated buffer set for input and output. + module->input_output_alias_config().ForEachAlias( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + std::vector colocated_set; + AddBufferToColocatedSet(module->entry_computation()->root_instruction(), + output_index, points_to_analysis, + &colocated_set); + AddBufferToColocatedSet( + module->entry_computation()->parameter_instruction(param_number), + param_index, points_to_analysis, &colocated_set); + AddSetToColocatedBufferSets(colocated_set, colocated_buffer_sets); + }); + for (const HloComputation* computation : module->MakeComputationPostOrder()) { if (computation->IsFusionComputation()) { continue; diff --git a/tensorflow/compiler/xla/service/buffer_value.h b/tensorflow/compiler/xla/service/buffer_value.h index 69b36463560a1fad4f62687e9014fb3fbe5bbd13..11d8abc5badf7b1a05239ed74a05be0c899e37a1 100644 --- a/tensorflow/compiler/xla/service/buffer_value.h +++ b/tensorflow/compiler/xla/service/buffer_value.h @@ -141,6 +141,9 @@ class BufferValue { // operator< is required for std::set. bool operator<(const BufferValue& other) const { return id_ < other.id_; } + bool operator==(const BufferValue& other) const { return id_ == other.id_; } + bool operator!=(const BufferValue& other) const { return id_ != other.id_; } + virtual string ToString() const = 0; // TODO(lauj) rename LogicalBufferProto to BufferValueProto. diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index 96bd2616f5607de888a096f8392ceb68490276e3..6d67f970020d278cc7bf61b56350200d3e5cb926 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -67,7 +67,7 @@ CompileOnlyService::CompileAheadOfTime( std::unique_ptr* metadata) { std::vector> hlo_modules; for (const AotXlaComputationInstance& instance : computations) { - TF_RET_CHECK(instance.computation.has_program_shape()); + TF_RET_CHECK(instance.computation.has_host_program_shape()); const DebugOptions& debug_options = options.debug_options(); @@ -86,9 +86,11 @@ CompileOnlyService::CompileAheadOfTime( Executable::DumpToDirectory(per_host_path, filename, hlo_snapshot)); } - const auto& program_shape = instance.computation.program_shape(); + const auto& program_shape = instance.computation.host_program_shape(); ExecutionOptions execution_options; *execution_options.mutable_debug_options() = debug_options; + *execution_options.mutable_shape_with_output_layout() = + *instance.result_layout; TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, CreateModuleConfig(program_shape, instance.argument_layouts, @@ -101,8 +103,10 @@ CompileOnlyService::CompileAheadOfTime( hlo_modules.push_back(std::move(hlo_module)); } - return compiler_->CompileAheadOfTime(std::move(hlo_modules), options, - metadata); + return compiler_->CompileAheadOfTime( + absl::make_unique(hlo_modules[0]->name(), + absl::MakeSpan(hlo_modules)), + options, metadata); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index 687ecafe0c308ecc22857fae650c6998677f605d..80c630c6201503d88a690f04a88f6fca6f3a438a 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -45,7 +45,7 @@ Compiler::ComputeDefaultBackendConfig(const HloInstruction& hlo, // Define a default version where metadata is not used. StatusOr>> Compiler::CompileAheadOfTime( - std::vector> modules, + std::unique_ptr module_group, const AotCompilationOptions& options, std::unique_ptr* metadata) { if (metadata != nullptr) { @@ -53,7 +53,7 @@ Compiler::CompileAheadOfTime( "Populating AotCompilationMetadata is not implemented on this " "compiler."); } - return CompileAheadOfTime(std::move(modules), options); + return CompileAheadOfTime(std::move(module_group), options); } /* static */ std::map* diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 1fdda31c34a17a16f75e1efada542c2c2ea15038..9ab179303b3e792c1f94c08626d7bc1afd2099f8 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" +#include "tensorflow/compiler/xla/service/hlo_module_group.h" #include "tensorflow/compiler/xla/service/logical_buffer.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" @@ -135,6 +136,12 @@ class Compiler { std::unique_ptr module, se::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator) = 0; + // Optimizes a HLO module group, a set of module which runs concurrently on + // multiple devices potentially communicating data between the modules. + virtual Status RunHloPassesOnModuleGroup( + HloModuleGroup* module_group, se::StreamExecutor* executor, + DeviceMemoryAllocator* device_allocator) = 0; + // Compiles the HLO module for execution on a device given by the executor, // and returns an executable object or an error status. No HLO passes are // applied to module. Generally a module should be passed through RunHloPasses @@ -145,12 +152,18 @@ class Compiler { // (not just type of device) indicated by the executor. // // device_allocator is optional; see RunHloPasses. - // - // Use the overload below to compile computations that run in parallel. virtual StatusOr> RunBackend( std::unique_ptr module, se::StreamExecutor* executor, DeviceMemoryAllocator* device_allocator) = 0; + // Compiles a set of HLO modules that can run in parallel, potentially + // communicating data between the modules. + virtual StatusOr>> + RunBackendOnModuleGroup( + std::unique_ptr module_group, + std::vector> stream_exec, + DeviceMemoryAllocator* device_allocator) = 0; + // Compiles a set of HLO modules that can run in parallel, potentially // communicating data between the modules, and returns a corresponding // sequence of executable objects. @@ -160,7 +173,7 @@ class Compiler { // TODO(b/68666782): Remove this method after adding support for multiple // modules to RunHloPasses and RunBackends. virtual StatusOr>> Compile( - std::vector> modules, + std::unique_ptr module_group, std::vector> stream_exec, DeviceMemoryAllocator* device_allocator) = 0; @@ -184,16 +197,16 @@ class Compiler { ComputeDefaultBackendConfig(const HloInstruction& hlo, se::StreamExecutor* executor) const; - // Compiles the HLO module for ahead-of-time execution. This is intended for - // use in static compilation. + // Compiles the HLO module group for ahead-of-time execution. This is + // intended for use in static compilation. virtual StatusOr>> - CompileAheadOfTime(std::vector> modules, + CompileAheadOfTime(std::unique_ptr module_group, const AotCompilationOptions& options) = 0; // Similar to CompileAheadOfTime above but AotCompilationMetadata // has an argument that can be populated during compilation. virtual StatusOr>> - CompileAheadOfTime(std::vector> modules, + CompileAheadOfTime(std::unique_ptr module_group, const AotCompilationOptions& options, std::unique_ptr* metadata); diff --git a/tensorflow/compiler/xla/service/computation_layout.cc b/tensorflow/compiler/xla/service/computation_layout.cc index af8f7f1027a40703137d6880a9865449c560a47b..efc893818d03a20d6bd65b7dc1da72ea5da5ceb0 100644 --- a/tensorflow/compiler/xla/service/computation_layout.cc +++ b/tensorflow/compiler/xla/service/computation_layout.cc @@ -56,4 +56,14 @@ string ComputationLayout::ToString() const { result_layout_.ToString()); } +ProgramShape ComputationLayout::ComputeProgramShape() const { + ProgramShape program_shape; + for (int64 i = 0; i < parameter_layouts_.size(); ++i) { + *program_shape.add_parameters() = parameter_layouts_[i].shape(); + *program_shape.add_parameter_names() = absl::StrCat("p", i); + } + *program_shape.mutable_result() = result_layout_.shape(); + return program_shape; +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/computation_layout.h b/tensorflow/compiler/xla/service/computation_layout.h index 6975f387b4864bf28ea0ad23d7d4602b5b346e08..a2fb656677f354fbf85ff613d826cd6be86ba3bf 100644 --- a/tensorflow/compiler/xla/service/computation_layout.h +++ b/tensorflow/compiler/xla/service/computation_layout.h @@ -83,6 +83,10 @@ class ComputationLayout { // Returns a string representation of this object. string ToString() const; + // Create a ProgramShape proto based on the parameter and result shapes held + // within this object. + ProgramShape ComputeProgramShape() const; + private: std::vector parameter_layouts_; ShapeLayout result_layout_; diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index f35324aa35370b592871749cba9fc2f66bea9219..245db6be2a400a7447f1e87317018cbb1572c405 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -40,10 +40,12 @@ namespace { using absl::StrAppend; -bool IsEntryParameterValue(const HloValue& value) { +bool IsReadonlyEntryParameterValue(const HloValue& value) { const HloComputation* computation = value.defining_instruction()->parent(); return value.defining_instruction()->opcode() == HloOpcode::kParameter && - computation == computation->parent()->entry_computation(); + computation == computation->parent()->entry_computation() && + !computation->parent()->input_output_alias_config().ParameterHasAlias( + value.defining_instruction()->parameter_number(), value.index()); } bool IsConstantValue(const HloValue& value) { @@ -51,7 +53,7 @@ bool IsConstantValue(const HloValue& value) { } bool ValueIsReadOnly(const HloValue& value) { - return IsConstantValue(value) || IsEntryParameterValue(value); + return IsConstantValue(value) || IsReadonlyEntryParameterValue(value); } // Data structure describing the action which should be taken on parts of a @@ -79,8 +81,7 @@ SpecialCaseCopyPolicy GetSpecialCaseCopyPolicy(const CallGraphNode& node, bool ShouldCopyRootValue(const HloValue& value, const SpecialCaseCopyPolicy& policy) { if (policy.copy_parameters_and_constants) { - return IsConstantValue(value) || - value.defining_instruction()->opcode() == HloOpcode::kParameter; + return ValueIsReadOnly(value); } return false; } @@ -332,6 +333,81 @@ Status AddCopiesForConditional(const HloAliasAnalysis& alias_analysis, return Status::OK(); } +// Conservatively adds copies before root instruction of entry computation and +// each aliased parameter to resolve interference of aliased input and output +// buffer. We later rely on the CopyRemover to drop the unnecessary ones. +Status AddCopiesForAliasedInputOutputs(HloModule* module) { + HloComputation* entry = module->entry_computation(); + HloInstruction* root = entry->root_instruction(); + + ShapeTree output_indices_to_copy(root->shape()); + std::vector> copied_parameters; + bool has_alias = false; + for (auto* param : entry->parameter_instructions()) { + bool param_has_alias = false; + ShapeTree param_indices_to_copy(param->shape()); + + module->input_output_alias_config().ForEachAlias( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + if (param_number == param->parameter_number()) { + param_has_alias = true; + *(param_indices_to_copy.mutable_element(param_index)) = true; + *(output_indices_to_copy.mutable_element(output_index)) = true; + } + }); + + if (!param_has_alias) { + continue; + } + + has_alias = true; + // Store a snapshot of users before DeepCopyInstruction, as + // DeepCopyInstruction introduces new users of the instruction. + std::vector users = param->users(); + ShapeTree param_copy_tree(param->shape(), + /*init_value=*/nullptr); + TF_ASSIGN_OR_RETURN(HloInstruction * copied, + entry->DeepCopyInstruction( + param, ¶m_indices_to_copy, ¶m_copy_tree)); + for (HloInstruction* user : users) { + TF_RETURN_IF_ERROR(param->ReplaceUseWith(user, copied)); + } + + copied_parameters.push_back(param_copy_tree); + } + + if (!has_alias) { + return Status::OK(); + } + + // Add copies before root instruction. + ShapeTree output_copy_tree(root->shape(), + /*init_value=*/nullptr); + + TF_ASSIGN_OR_RETURN(HloInstruction * root_copied, + root->parent()->DeepCopyInstruction( + root, &output_indices_to_copy, &output_copy_tree)); + + // Add control dependencies between the input/output copies. + TF_RETURN_IF_ERROR(module->input_output_alias_config().ForEachAliasWithStatus( + [&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& input_index) -> Status { + HloInstruction* from = + copied_parameters[param_number].element(input_index); + HloInstruction* to = output_copy_tree.element(output_index); + + TF_RET_CHECK(from != nullptr); + TF_RET_CHECK(to != nullptr); + TF_RETURN_IF_ERROR(from->AddControlDependencyTo(to)); + return Status::OK(); + })); + + entry->set_root_instruction(root_copied); + + return Status::OK(); +} + // Removes any control dependencies to or from the given instruction. Status StripControlDependenciesFrom(HloInstruction* instruction) { while (!instruction->control_successors().empty()) { @@ -953,6 +1029,8 @@ Status CopyInsertion::AddCopiesToResolveInterference(HloModule* module) { } } } + + TF_RETURN_IF_ERROR(AddCopiesForAliasedInputOutputs(module)); return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/copy_insertion_test.cc b/tensorflow/compiler/xla/service/copy_insertion_test.cc index 892d0d7b547aaf1e7f1c55e4163d1e1fd9518def..4533ebb99bbba854a029fb8a9a1e31b023be720d 100644 --- a/tensorflow/compiler/xla/service/copy_insertion_test.cc +++ b/tensorflow/compiler/xla/service/copy_insertion_test.cc @@ -1351,6 +1351,218 @@ TEST_F(CopyInsertionTest, SwizzlingWhile) { EXPECT_THAT(xla_while->operand(0), op::Tuple(op::Copy(), op::Copy())); } +TEST_F(CopyInsertionTest, CrossingParameters) { + // Test a case where two parameters' dataflow cross with each other while + // input and output are aliased with same index: + // + // (p0 , p1) + // | \ /| + // | \ / | + // alias X alias + // | / \ | + // | / \| + // (p1 , p0) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte1, gte0})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 4); +} + +TEST_F(CopyInsertionTest, ParametersAliasing) { + // Test a case where two parameters' dataflow don't interfere with each other + // while aliased. + // + // (p0 , p1) + // | | + // | | + // alias alias + // | | + // | | + // (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 0); +} + +TEST_F(CopyInsertionTest, ParameterWithNoAliasing) { + // Test a case where no parameter is aliased with result. In this case, copy + // should be added + // + // (p0 , p1) + // | | + // | | + // | | + // | | + // | | + // (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module->AddEntryComputation(builder.Build()); + InsertCopies(module.get()); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::Copy(op::GetTupleElement(param, 0)), + op::Copy(op::GetTupleElement(param, 1)))); + + EXPECT_EQ(CountCopies(*module), 2); +} + +TEST_F(CopyInsertionTest, ParameterWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // (p0 , p1) + // | | + // | | + // alias | + // | | + // | | + // (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Tuple(op::GetTupleElement(param, 0), + op::Copy(op::GetTupleElement(param, 1)))); + + EXPECT_EQ(CountCopies(*module), 1); +} + +TEST_F(CopyInsertionTest, ParameterAndParallelOpsWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // +-- (p0 , p1) + // | | | + // | | | + // alias Negate Negate + // | | | + // | | | + // +-- (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 0); +} + +TEST_F(CopyInsertionTest, ParameterAndOpsWithPartialAliasing) { + // Test a case where one parameter is aliased with result while another one + // isn't. + // + // +-- (p0 , p1) + // | | | + // | | | + // alias Negate Negate + // | | | + // | Add----+ + // | | | + // +-- (p0 , p1) + auto module = CreateNewModule(); + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape_, HloOpcode::kAdd, negate0, negate1)); + builder.AddInstruction(HloInstruction::CreateTuple({add, negate1})); + module->AddEntryComputation(builder.Build()); + ASSERT_IS_OK(module->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + InsertCopies(module.get()); + + EXPECT_EQ(CountCopies(*module), 0); +} + TEST_F(CopyInsertionTest, SwizzlingWhileWithOneOp) { // Test a while instruction with a body which permutes its tuple parameter // elements and applies one operation to one of the elements. The addition of diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 68c715a086af2a53acd510d51479b29e2eeac632..da01c0caf2a6665f71cc087270b21fffdd6caa0d 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -676,9 +676,12 @@ StatusOr> CpuCompiler::RunBackend( } StatusOr>> -CpuCompiler::CompileAheadOfTime(std::vector> modules, +CpuCompiler::CompileAheadOfTime(std::unique_ptr module_group, const AotCompilationOptions& aot_options) { - TF_RET_CHECK(!modules.empty()); + TF_RET_CHECK(!module_group->empty()); + std::vector> modules = + module_group->ConsumeModules(); + std::call_once(llvm_command_line_options_initialized, &llvm_ir::InitializeLLVMCommandLineOptions, modules[0]->config()); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h index f2af923782df268e3e6da3895ec35579ab6aa51f..c67307548dda731f8fa56b8e6790e7e83f587113 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.h @@ -142,7 +142,7 @@ class CpuCompiler : public LLVMCompiler { DeviceMemoryAllocator* device_allocator) override; StatusOr>> - CompileAheadOfTime(std::vector> modules, + CompileAheadOfTime(std::unique_ptr module_group, const AotCompilationOptions& options) override; se::Platform::Id PlatformId() const override; diff --git a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc index be1208fb2df2a1a11a093810b5f6c2a83f468062..e6b6fcdf684eadb3702e490bbe24dbb7b3b52ec7 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_hlo_support_checker_test.cc @@ -57,10 +57,13 @@ TEST_F(CpuHloSupportCheckerTest, SparseUnimplemented) { HloInstruction::CreateParameter(1, sparse_shape, "param1")); builder.AddInstruction(HloInstruction::CreateBinary( sparse_shape, HloOpcode::kAdd, param0, param1)); - auto module = CreateNewModule(); + // Since verifier is reporting sparse layouts as errors, we should + // use a regular HloModule instead of VerifiedHloModule to avoid + // verifier errors being triggered in the destructor. + auto module = HloTestBase::CreateNewModule(); module->AddEntryComputation(builder.Build()); - Status status = checker().Run(module).status(); + Status status = checker().Run(module.get()).status(); ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED); EXPECT_THAT(status.error_message(), HasSubstr("CPU backend does not support")); diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index a70abb117acd2917e7273921e1919b0e03b6cd63..b2abdb39a598871a7cc44760e464f48b9a200874 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -688,8 +688,25 @@ StatusOr IrEmitter::EmitTargetElementLoopBodyForReduceWindow( for (size_t i = 0; i < index.size(); ++i) { llvm::Value* strided_index = NSWMul(index[i], b_.getInt64(window.dimensions(i).stride())); - input_index[i] = NSWSub(NSWAdd(strided_index, window_index[i]), - b_.getInt64(window.dimensions(i).padding_low())); + input_index[i] = NSWSub( + NSWAdd(strided_index, + NSWMul(window_index[i], + b_.getInt64(window.dimensions(i).window_dilation()))), + b_.getInt64(window.dimensions(i).padding_low())); + + // We need to verify that we are not in the dilated base area. + llvm::Value* dilation_condition = ICmpEQ( + SRem(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())), + b_.getInt64(0)); + if (in_bounds_condition == nullptr) { + in_bounds_condition = dilation_condition; + } else { + in_bounds_condition = And(in_bounds_condition, dilation_condition); + } + + // Apply base dilation to the index. + input_index[i] = + SDiv(input_index[i], b_.getInt64(window.dimensions(i).base_dilation())); // We need to check if 0 <= input_index[i] < bound, as otherwise we are in // the padding so that we can skip the computation. That is equivalent to @@ -728,12 +745,6 @@ Status IrEmitter::HandleReduceWindow(HloInstruction* reduce_window) { /*operands=*/{reduce_window->operand(0)}, /*supported_types=*/{F32, BF16, S32, F16})); - // TODO(b/31410564): Implement dilation for reduce-window. - if (window_util::HasDilation(reduce_window->window())) { - return Unimplemented( - "Dilation for ReduceWindow is not implemented on CPU."); - } - // Pseudo code for reduce window: // // for (coordinates O in the output) diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc index 3e7373adc5ab8a60fd18348ce2477175aaaa8fd4..c54f81e6915a286757e59821c2684a7271889816 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc @@ -50,7 +50,7 @@ void DfsHloVisitorBase::SetVisiting( const HloInstruction& instruction) { VLOG(3) << "marking HLO " << &instruction << " as visiting: "; DCHECK(NotVisited(instruction)); - visit_state_.SetState(instruction.unique_id(), VisitState::kVisiting); + visit_state_[instruction.unique_id()] = VisitState::kVisiting; } template @@ -58,7 +58,7 @@ void DfsHloVisitorBase::SetVisited( const HloInstruction& instruction) { VLOG(3) << "marking HLO " << &instruction << " as visited: "; DCHECK(NotVisited(instruction) || IsVisiting(instruction)); - visit_state_.SetState(instruction.unique_id(), VisitState::kVisited); + visit_state_[instruction.unique_id()] = VisitState::kVisited; } template diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index 68d01d75a2ed3d7eaadb03a46ba3bd20f43a9ffc..4159aa281fa2b66d310d7c135f123a5a3bb83270 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/strings/string_view.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/literal.h" @@ -263,21 +264,25 @@ class DfsHloVisitorBase { kVisited = 2, }; - VisitState GetVisitState(int id) { return visit_state_.GetState(id); } + VisitState GetVisitState(int id) { + auto iter = visit_state_.find(id); + if (iter == visit_state_.end()) { + return VisitState::kNotVisited; + } + return iter->second; + } VisitState GetVisitState(const HloInstruction& instruction); // Resize internal state if necessary to hold state for ids <= num. // This call is purely a performance hint and can be omitted without // affecting correctness. - void ReserveVisitStates(int num) { visit_state_.Reserve(num); } + void ReserveVisitStates(int num) { visit_state_.reserve(num); } // Useful when we want to visit the same computation more than once with the // same visitor. - void ResetVisitStates() { visit_state_.Reset(); } + void ResetVisitStates() { visit_state_.clear(); } - void SetVisitState(int id, VisitState state) { - visit_state_.SetState(id, state); - } + void SetVisitState(int id, VisitState state) { visit_state_[id] = state; } // Sets the visitation state of the given instruction as kVisiting. // @@ -326,44 +331,7 @@ class DfsHloVisitorBase { virtual Status Postprocess(HloInstructionPtr hlo); private: - class DFSVisitStates { - public: - DFSVisitStates() {} - void Reserve(uint64 num) { - states_.reserve((num + kStatesPerWord - 1) / kStatesPerWord); - } - VisitState GetState(uint64 id) { - uint64 word_index = id / kStatesPerWord; - if (word_index >= states_.size()) { - return VisitState::kNotVisited; - } - static_assert(static_cast(VisitState::kVisited) < 3, - "VisitState must fit in two bits"); - uint64 w = states_[word_index]; - uint32 shift = 2 * (id % kStatesPerWord); // 2 bits per state - return static_cast((w >> shift) & 0x3); - } - void SetState(uint64 id, VisitState state) { - uint64 word_index = id / kStatesPerWord; - if (word_index >= states_.size()) { - states_.resize(word_index + 1, 0); - } - uint64* w = &states_[word_index]; - uint32 shift = 2 * (id % kStatesPerWord); // 2 bits per state - uint64 mask = 0x3ull << shift; - *w = (*w & ~mask) | (static_cast(state) << shift); - DCHECK_EQ(GetState(id), state); - } - void Reset() { states_.clear(); } - - private: - static const uint32 kStatesPerWord = sizeof(uint64) / 2 /*bits per entry*/; - // Map from id to two-bit states. We store 32 such states per 64-bit - // value - std::vector states_; - }; - - DFSVisitStates visit_state_; + absl::flat_hash_map visit_state_; TF_DISALLOW_COPY_AND_ASSIGN(DfsHloVisitorBase); }; diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 522e9f5948da2206f144ede4fdd95350474146d9..4eb5739fe27d228c4d8939c429665f5d50a6e219 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -154,7 +154,7 @@ cc_library( deps = [ ":backend_configs", ":buffer_allocations", - ":cudnn_convolution_runner", + ":cudnn_conv_runner", ":elemental_ir_emitter", ":gpu_constants", ":gpu_executable", @@ -323,7 +323,7 @@ cc_library( ], deps = [ ":buffer_allocations", - ":cudnn_convolution_runner", + ":cudnn_conv_runner", ":hlo_execution_profiler", ":infeed_manager", ":ir_emission_utils", @@ -385,13 +385,13 @@ cc_library( ) cc_library( - name = "cudnn_convolution_algorithm_picker", - srcs = ["cudnn_convolution_algorithm_picker.cc"], - hdrs = ["cudnn_convolution_algorithm_picker.h"], + name = "cudnn_conv_algorithm_picker", + srcs = ["cudnn_conv_algorithm_picker.cc"], + hdrs = ["cudnn_conv_algorithm_picker.h"], deps = [ ":backend_configs", ":buffer_comparator", - ":cudnn_convolution_runner", + ":cudnn_conv_runner", ":gpu_executable", ":ir_emission_utils", "//tensorflow/compiler/xla:literal_util", @@ -404,14 +404,15 @@ cc_library( "//tensorflow/core:stream_executor_no_cuda", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/time", "@com_google_absl//absl/types:optional", ], ) cc_library( - name = "cudnn_convolution_runner", - srcs = ["cudnn_convolution_runner.cc"], - hdrs = ["cudnn_convolution_runner.h"], + name = "cudnn_conv_runner", + srcs = ["cudnn_conv_runner.cc"], + hdrs = ["cudnn_conv_runner.h"], deps = [ ":backend_configs", ":ir_emission_utils", @@ -431,9 +432,9 @@ cc_library( ) cc_library( - name = "cudnn_convolution_rewriter", - srcs = ["cudnn_convolution_rewriter.cc"], - hdrs = ["cudnn_convolution_rewriter.h"], + name = "cudnn_conv_rewriter", + srcs = ["cudnn_conv_rewriter.cc"], + hdrs = ["cudnn_conv_rewriter.h"], deps = [ ":backend_configs", ":ir_emission_utils", @@ -448,10 +449,10 @@ cc_library( ) tf_cc_test( - name = "cudnn_convolution_rewriter_test", - srcs = ["cudnn_convolution_rewriter_test.cc"], + name = "cudnn_conv_rewriter_test", + srcs = ["cudnn_conv_rewriter_test.cc"], deps = [ - ":cudnn_convolution_rewriter", + ":cudnn_conv_rewriter", ":ir_emission_utils", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla:test_helpers", @@ -580,9 +581,9 @@ tf_cc_test( ) cc_library( - name = "pad_insertion", - srcs = ["pad_insertion.cc"], - hdrs = ["pad_insertion.h"], + name = "cudnn_conv_padding_legalization", + srcs = ["cudnn_conv_padding_legalization.cc"], + hdrs = ["cudnn_conv_padding_legalization.h"], deps = [ ":ir_emission_utils", "//tensorflow/compiler/xla:literal", @@ -590,6 +591,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:window_util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_casting_utils", "//tensorflow/compiler/xla/service:hlo_creation_utils", "//tensorflow/compiler/xla/service:hlo_pass", "//tensorflow/compiler/xla/service:shape_inference", @@ -598,9 +600,9 @@ cc_library( ) cc_library( - name = "pad_for_tensor_cores", - srcs = ["pad_for_tensor_cores.cc"], - hdrs = ["pad_for_tensor_cores.h"], + name = "cudnn_conv_pad_for_tensor_cores", + srcs = ["cudnn_conv_pad_for_tensor_cores.cc"], + hdrs = ["cudnn_conv_pad_for_tensor_cores.h"], deps = [ ":ir_emission_utils", "//tensorflow/compiler/xla:literal_util", @@ -612,11 +614,11 @@ cc_library( ) tf_cc_test( - name = "pad_for_tensor_cores_test", - srcs = ["pad_for_tensor_cores_test.cc"], + name = "cudnn_conv_pad_for_tensor_cores_test", + srcs = ["cudnn_conv_pad_for_tensor_cores_test.cc"], deps = [ + ":cudnn_conv_pad_for_tensor_cores", ":ir_emission_utils", - ":pad_for_tensor_cores", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla/service:hlo_matchers", @@ -658,9 +660,11 @@ cc_library( srcs = ["nvptx_compiler.cc"], hdrs = ["nvptx_compiler.h"], deps = [ - ":cudnn_convolution_algorithm_picker", - ":cudnn_convolution_rewriter", - ":cudnn_fused_convolution_rewriter", + ":cudnn_conv_algorithm_picker", + ":cudnn_conv_pad_for_tensor_cores", + ":cudnn_conv_padding_legalization", + ":cudnn_conv_rewriter", + ":cudnn_fused_conv_rewriter", ":fusion_merger", ":gpu_constants", ":gpu_copy_insertion", @@ -672,8 +676,6 @@ cc_library( ":ir_emission_utils", ":ir_emitter", ":multi_output_fusion", - ":pad_for_tensor_cores", - ":pad_insertion", ":partition_assignment", ":stream_assignment", ":stream_executor_util", @@ -704,7 +706,6 @@ cc_library( "//tensorflow/compiler/xla/service:llvm_compiler", "//tensorflow/compiler/xla/service:reduce_precision_insertion", "//tensorflow/compiler/xla/service:reshape_mover", - "//tensorflow/compiler/xla/service:scatter_expander", "//tensorflow/compiler/xla/service:transpose_folding", "//tensorflow/compiler/xla/service:tuple_simplifier", "//tensorflow/compiler/xla/service:while_loop_constant_sinking", @@ -780,7 +781,6 @@ cc_library( srcs = ["gpu_layout_assignment.cc"], hdrs = ["gpu_layout_assignment.h"], deps = [ - ":gpu_options", ":ir_emission_utils", ":stream_executor_util", "//tensorflow/compiler/xla:shape_util", @@ -881,16 +881,6 @@ cc_library( ], ) -cc_library( - name = "gpu_options", - srcs = ["gpu_options.cc"], - hdrs = ["gpu_options.h"], - deps = [ - "//tensorflow/compiler/xla/service:hlo_module_config", - "//tensorflow/core:lib_internal", - ], -) - cc_library( name = "stream_executor_util", srcs = ["stream_executor_util.cc"], @@ -976,9 +966,9 @@ tf_cc_test( ) cc_library( - name = "cudnn_fused_convolution_rewriter", - srcs = ["cudnn_fused_convolution_rewriter.cc"], - hdrs = ["cudnn_fused_convolution_rewriter.h"], + name = "cudnn_fused_conv_rewriter", + srcs = ["cudnn_fused_conv_rewriter.cc"], + hdrs = ["cudnn_fused_conv_rewriter.h"], deps = [ ":backend_configs", ":ir_emission_utils", diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc index 4effea637d01bf23b54d341b77306b20b1b133c8..e1dffad3045808c4f316ccafdda39a174e1560c8 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.cc @@ -18,7 +18,7 @@ limitations under the License. #include #include "absl/strings/str_cat.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/types.h" @@ -56,9 +56,9 @@ Status ConvolutionThunk::ExecuteOnStream( buffer_allocations.GetDeviceAddress(scratch_buffer_); auto op_profiler = profiler->MakeScopedInstructionProfiler(hlo_instruction()); - TF_RETURN_IF_ERROR(RunCudnnConvolution(cudnn_call_, - absl::MakeSpan(operand_se_buffers), - result_buffer, scratch, stream)); + TF_RETURN_IF_ERROR(RunCudnnConv(cudnn_call_, + absl::MakeSpan(operand_se_buffers), + result_buffer, scratch, stream)); void* ptrs[] = {result_buffer.opaque(), scratch.opaque()}; se::DeviceMemory tuple_addr( diff --git a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h index f53bc541983378819dba36489dd69c348f50af32..c71515490c94ef54baad9005509d1813de630159 100644 --- a/tensorflow/compiler/xla/service/gpu/convolution_thunk.h +++ b/tensorflow/compiler/xla/service/gpu/convolution_thunk.h @@ -19,7 +19,7 @@ limitations under the License. #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/gpu/buffer_allocations.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/gpu/gpu_executable.h" #include "tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h" #include "tensorflow/compiler/xla/service/gpu/thunk.h" diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc similarity index 90% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc index 7125673887d28729287d67577bcfa06423f85611..6d6780fa1c7b0c636eb771c40e74f074cd8c4c4b 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/types/optional.h" @@ -145,9 +145,8 @@ tensorflow::mutex_lock LockGpu(const se::StreamExecutor* stream_exec) { // cache misses and doing extra work. Overall, caching doesn't seem worth the // trouble, but we may want to revisit this if we ever find a model where // caching would speed up compilation a lot. -StatusOr> -CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( - HloCustomCallInstruction* instr) { +StatusOr +CudnnConvAlgorithmPicker::PickBestAlgorithm(HloCustomCallInstruction* instr) { // TODO(timshen): for now only check fp16. It can be expanded to other types, // with some work on the HLO routines. const bool cross_check_enabled = @@ -253,10 +252,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( backend_config.set_algorithm(alg.algo_id()); backend_config.set_tensor_ops_enabled(alg.tensor_ops_enabled()); TF_RETURN_IF_ERROR(instr->set_backend_config(backend_config)); - bool launch_ok = RunCudnnConvolution(instr, absl::MakeSpan(operand_buffers), - result_buffer, &scratch_allocator, - &stream, &profile_result) - .ok(); + bool launch_ok = + RunCudnnConv(instr, absl::MakeSpan(operand_buffers), result_buffer, + &scratch_allocator, &stream, &profile_result) + .ok(); if (launch_ok && profile_result.is_valid()) { const bool crash_on_checking_failure = @@ -316,9 +315,10 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( << AlgorithmToString(best_result.algorithm()) << ", takes " << best_result.elapsed_time_in_ms() << "ms, and uses " << best_result_bytes_used << "B of scratch memory."; - return std::make_tuple(best_result.algorithm().algo_id(), - best_result.algorithm().tensor_ops_enabled(), - best_result_bytes_used); + return AutotuneResult{best_result.algorithm().algo_id(), + best_result.algorithm().tensor_ops_enabled(), + best_result_bytes_used, + absl::Milliseconds(best_result.elapsed_time_in_ms())}; } return InternalError( @@ -327,45 +327,41 @@ CudnnConvolutionAlgorithmPicker::PickBestAlgorithm( instr->ToString()); } -StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( +StatusOr CudnnConvAlgorithmPicker::RunOnInstruction( HloInstruction* instr) { CHECK(IsCustomCallToDnnConvolution(*instr)); - StatusOr> alg_scratch_and_tc = + StatusOr best_algo_or = PickBestAlgorithm(Cast(instr)); - - if (!alg_scratch_and_tc.ok()) { - LOG(ERROR) << alg_scratch_and_tc.status(); + if (!best_algo_or.ok()) { + LOG(ERROR) << best_algo_or.status(); return false; } - int64 algorithm; - bool tensor_ops_enabled; - int64 scratch_bytes; - - std::tie(algorithm, tensor_ops_enabled, scratch_bytes) = - alg_scratch_and_tc.ConsumeValueOrDie(); - - VLOG(1) << "Setting cudnn conv to use algorithm " << algorithm << " and " - << NumBytesToString(scratch_bytes) + auto best_algo = std::move(best_algo_or).ValueOrDie(); + VLOG(1) << "Setting cudnn conv to use algorithm " << best_algo.algorithm + << " and " << NumBytesToString(best_algo.scratch_bytes) << " of scratch memory: " << instr->ToString() - << " tensor_ops_enabled: " << tensor_ops_enabled; + << " tensor_ops_enabled: " << best_algo.tensor_ops_enabled; // Replace instr with a new CustomCall which has the correct algorithm, and // whose output shape has the appropriate amount of scratch memory. HloComputation* computation = instr->parent(); - Shape new_call_shape = - ShapeUtil::MakeTupleShape({instr->shape().tuple_shapes(0), - ShapeUtil::MakeShape(U8, {scratch_bytes})}); + Shape new_call_shape = ShapeUtil::MakeTupleShape( + {instr->shape().tuple_shapes(0), + ShapeUtil::MakeShape(U8, {best_algo.scratch_bytes})}); TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config, instr->backend_config()); - backend_config.set_algorithm(algorithm); - backend_config.set_tensor_ops_enabled(tensor_ops_enabled); + backend_config.set_algorithm(best_algo.algorithm); + backend_config.set_tensor_ops_enabled(best_algo.tensor_ops_enabled); HloInstruction* new_call = computation->AddInstruction( instr->CloneWithNewOperands(new_call_shape, instr->operands())); + VLOG(1) << "Replacing convolution " << instr->ToString() << " with " + << new_call->ToString(); + TF_RETURN_IF_ERROR(new_call->set_backend_config(backend_config)); // Repackage new_call so it has the same shape as the original call, namely @@ -381,7 +377,7 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnInstruction( return true; } -StatusOr CudnnConvolutionAlgorithmPicker::RunOnComputation( +StatusOr CudnnConvAlgorithmPicker::RunOnComputation( HloComputation* computation) { std::vector convs; for (auto* instr : computation->instructions()) { @@ -398,7 +394,7 @@ StatusOr CudnnConvolutionAlgorithmPicker::RunOnComputation( return changed; } -StatusOr CudnnConvolutionAlgorithmPicker::Run(HloModule* module) { +StatusOr CudnnConvAlgorithmPicker::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation)); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h similarity index 71% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h index aeda2fc7f8b4d6169fc2baa8975119ba7bf68dd2..642af787afc71586d722ecc7e529ed8b3fa64d33 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h @@ -13,13 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_ +#include "absl/time/time.h" #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/compiler.h" #include "tensorflow/compiler/xla/service/device_memory_allocator.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -30,27 +31,32 @@ namespace gpu { // Modifies CustomCalls to cudnn convolutions, choosing the best algorithm for // each and adding explicit scratch space to the CustomCalls. -class CudnnConvolutionAlgorithmPicker : public HloModulePass { +class CudnnConvAlgorithmPicker : public HloModulePass { public: // If the `allocator` parameter is not null, we will use it to allocate temp // memory while timing the various convolution algorithms. If it's null, // we'll use the default allocator on the StreamExecutor. - CudnnConvolutionAlgorithmPicker(se::StreamExecutor* stream_exec, - DeviceMemoryAllocator* allocator, - Compiler* compiler) + CudnnConvAlgorithmPicker(se::StreamExecutor* stream_exec, + DeviceMemoryAllocator* allocator, Compiler* compiler) : stream_exec_(stream_exec), allocator_(allocator), compiler_(compiler) {} absl::string_view name() const override { - return "cudnn-convolution-algorithm-picker"; + return "cudnn-conv-algorithm-picker"; } StatusOr Run(HloModule* module) override; private: + struct AutotuneResult { + int64 algorithm; + bool tensor_ops_enabled; + int64 scratch_bytes; + absl::Duration runtime; + }; + StatusOr RunOnComputation(HloComputation* computation); StatusOr RunOnInstruction(HloInstruction* instr); - StatusOr> PickBestAlgorithm( - HloCustomCallInstruction* instr); + StatusOr PickBestAlgorithm(HloCustomCallInstruction* instr); se::StreamExecutor* stream_exec_; // never null DeviceMemoryAllocator* allocator_; // may be null @@ -60,4 +66,4 @@ class CudnnConvolutionAlgorithmPicker : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_ALGORITHM_PICKER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_ALGORITHM_PICKER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc similarity index 51% rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc index e3869b5c368957571219a39600214140022a7318..5aa4f839f4be5f1060480fea98775f8ffada0bdd 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" @@ -24,50 +24,17 @@ limitations under the License. namespace xla { namespace gpu { -// We want the input/output feature counts of an f16 conv to be factors of 8, -// because without this cudnn can't use tensor cores on the conv. -static constexpr int64 kDesiredNumFeaturesFactor = 8; - // We won't pad a conv if doing so increases the total number of bytes in the // lhs, rhs, or result by more than this amount. // // TODO(jlebar): This number was tuned experimentally. It represents a // compromise on our current benchmarks; it speeds some up significantly, and // doesn't slow any down. But we can observe by changing this value that -// there's additional room for speedups. Achieving those speedups without also -// slowing other things down will likely require a more sophisticated heuristic, -// possibly some form of auto-tuning. -// -// This value should be >= 4/3, otherwise the "dims of size 3 padded up to 4" -// special case inside PadShape won't fire. +// there's additional room for speedups. Achieving those speedups without +// also slowing other things down will likely require a more sophisticated +// heuristic, possibly some form of auto-tuning. static constexpr double kMaxBytesTouchedIncrease = 1.35; -// Pads the given dimensions in the given shape up to a multiple of -// kDesiredNumFeaturesFactor. -static Shape PadShape(Shape s, absl::Span dims) { - for (int64 dim : dims) { - int64 dim_to_pad_size = s.dimensions(dim); - - // Round dim_to_pad_size up to the next multiple of - // kDesiredNumFeaturesFactor. - // - // Special case: dims of size 3 are rounded up to 4, not - // kDesiredNumFeaturesFactor. Empirically (and on the advice of nvidia), - // this helps, but as of writing, it's not supported by anything in the - // cudnn docs. - int64 new_dim_to_pad_size; - if (dim_to_pad_size == 3) { - new_dim_to_pad_size = 4; - } else { - new_dim_to_pad_size = - RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor); - } - - s.set_dimensions(dim, new_dim_to_pad_size); - } - return s; -} - // Creates and returns an HLO that zero-pads one or more dimensions in the given // instruction so that its shape is equal to the given shape. // @@ -103,90 +70,19 @@ static HloInstruction* PadInstruction(HloInstruction* instr, HloInstruction::CreatePad(new_shape, instr, zero, pad_config)); } -// Pads the input/output feature dimensions of the given cudnn convolution -// custom-call to be multiples of kDesiredNumFeaturesFactor. -static StatusOr PadFeaturesDims(HloInstruction* conv) { +// Modifies the given convolution to have the given LHS/RHS/result shapes. +static Status PadConv(HloCustomCallInstruction* conv, + const Shape& new_lhs_shape, const Shape& new_rhs_shape, + const Shape& new_result_shape) { CHECK_EQ(0, conv->shape().tuple_shapes(1).dimensions(0)) << "conv must use 0 scratch bytes, i.e. this pass must be run " - "before CudnnConvolutionAlgorithmPicker."; + "before CudnnConvAlgorithmPicker."; - const auto& target = conv->custom_call_target(); - const auto& dnums = conv->convolution_dimension_numbers(); auto* lhs = conv->mutable_operand(0); auto* rhs = conv->mutable_operand(1); - const Shape& result_shape = conv->shape().tuple_shapes(0); - - Shape new_lhs_shape = [&] { - if (target == kCudnnConvForwardCallTarget || - target == kCudnnConvBackwardFilterCallTarget) { - // LHS is "input". - return PadShape(lhs->shape(), {dnums.input_feature_dimension()}); - } - CHECK_EQ(target, kCudnnConvBackwardInputCallTarget); - // LHS is "output". - return PadShape(lhs->shape(), {dnums.output_feature_dimension()}); - }(); - - Shape new_rhs_shape = [&] { - if (target == kCudnnConvForwardCallTarget || - target == kCudnnConvBackwardInputCallTarget) { - // RHS is "filter". - return PadShape(rhs->shape(), {dnums.kernel_input_feature_dimension(), - dnums.kernel_output_feature_dimension()}); - } - CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget); - // RHS is "output". - return PadShape(rhs->shape(), {dnums.output_feature_dimension()}); - }(); - - if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) && - ShapeUtil::Equal(rhs->shape(), new_rhs_shape)) { - VLOG(3) << "No need to pad features of " << conv->ToString(); - return false; - } - - Shape new_result_shape = [&] { - if (target == kCudnnConvForwardCallTarget) { - // Result is "output". - return PadShape(result_shape, {dnums.output_feature_dimension()}); - } - if (target == kCudnnConvBackwardInputCallTarget) { - // Result is "input". - return PadShape(result_shape, {dnums.input_feature_dimension()}); - } - CHECK_EQ(target, kCudnnConvBackwardFilterCallTarget); - // Result is "filter". - return PadShape(result_shape, {dnums.kernel_input_feature_dimension(), - dnums.kernel_output_feature_dimension()}); - }(); - - // Check that padding wouldn't increase the total bytes read/written by this - // operation too much. - auto check_size_increase = [&](const Shape& old_shape, - const Shape& new_shape) { - int64 old_bytes = ShapeUtil::ByteSizeOf(old_shape); - int64 new_bytes = ShapeUtil::ByteSizeOf(new_shape); - if (new_bytes <= old_bytes * kMaxBytesTouchedIncrease) { - return true; - } - VLOG(3) << "Not padding convolution; doing so would change input / result " - "shape from " - << ShapeUtil::HumanString(old_shape) << " to " - << ShapeUtil::HumanString(new_shape) << ", a size increase of " - << new_bytes / static_cast(old_bytes) << "x > " - << kMaxBytesTouchedIncrease << "x: " << conv->ToString(); - return false; - }; - if (!check_size_increase(lhs->shape(), new_lhs_shape) || - !check_size_increase(rhs->shape(), new_rhs_shape) || - !check_size_increase(result_shape, new_result_shape)) { - return false; - } - - // OK, let's do the transformation! - auto* new_lhs = PadInstruction(lhs, new_lhs_shape); auto* new_rhs = PadInstruction(rhs, new_rhs_shape); + const Shape& result_shape = conv->shape().tuple_shapes(0); CHECK(new_lhs != lhs || new_rhs != rhs) << "We should have had to pad either LHS or RHS."; @@ -219,30 +115,124 @@ static StatusOr PadFeaturesDims(HloInstruction* conv) { VLOG(2) << "Padded features of " << conv->ToString() << ", replaced with " << new_conv->ToString(); - TF_RETURN_IF_ERROR(conv->parent()->ReplaceInstruction(conv, new_conv)); + return conv->parent()->ReplaceInstruction(conv, new_conv); +} + +static StatusOr PadForTensorCores(HloCustomCallInstruction* conv) { + TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(conv)); + const auto& dnums = conv->convolution_dimension_numbers(); + auto* lhs = conv->mutable_operand(0); + auto* rhs = conv->mutable_operand(1); + const Shape& result_shape = conv->shape().tuple_shapes(0); + + // Nothing to do on non-f16 convolutions. + if (result_shape.element_type() != PrimitiveType::F16) { + return false; + } + + // TODO(timshen): Don't skip forward-activation convs if we find a benchmark + // where there's a speedup. + if (kind == CudnnConvKind::kForwardActivation) { + return false; + } + + Shape new_lhs_shape = lhs->shape(); + Shape new_rhs_shape = rhs->shape(); + Shape new_result_shape = conv->shape().tuple_shapes(0); + + // new_{input,filter_output}_shape points to the appropriate one of + // new_{lhs,rhs,result}_shape. + Shape* new_input_shape; + Shape* new_filter_shape; + Shape* new_output_shape; + std::tie(new_input_shape, new_filter_shape, new_output_shape) = [&] { + switch (kind) { + case CudnnConvKind::kForward: + case CudnnConvKind::kForwardActivation: + return std::make_tuple(&new_lhs_shape, &new_rhs_shape, + &new_result_shape); + case CudnnConvKind::kBackwardInput: + return std::make_tuple(&new_result_shape, &new_rhs_shape, + &new_lhs_shape); + case CudnnConvKind::kBackwardFilter: + return std::make_tuple(&new_lhs_shape, &new_result_shape, + &new_rhs_shape); + } + }(); + + // If there are 3 input features and 32 or 64 output features, pad the input + // features to 4. Otherwise, try padding to multiples of 8 and check that + // this doesn't make any of the conv buffers too much larger. + auto input_features = + new_input_shape->dimensions(dnums.input_feature_dimension()); + auto output_features = + new_output_shape->dimensions(dnums.output_feature_dimension()); + if (input_features == 3 && (output_features == 32 || output_features == 64)) { + new_input_shape->set_dimensions(dnums.input_feature_dimension(), 4); + new_filter_shape->set_dimensions(dnums.kernel_input_feature_dimension(), 4); + } else { + auto pad_dim = [](Shape* s, int64 dim) { + s->set_dimensions(dim, RoundUpToNearest(s->dimensions(dim), 8)); + }; + pad_dim(new_input_shape, dnums.input_feature_dimension()); + pad_dim(new_filter_shape, dnums.kernel_input_feature_dimension()); + pad_dim(new_filter_shape, dnums.kernel_output_feature_dimension()); + pad_dim(new_output_shape, dnums.output_feature_dimension()); + + // Check that padding wouldn't increase the total bytes read/written by this + // operation too much. + auto check_size_increase = [&](const Shape& old_shape, + const Shape& new_shape) { + int64 old_bytes = ShapeUtil::ByteSizeOf(old_shape); + int64 new_bytes = ShapeUtil::ByteSizeOf(new_shape); + if (new_bytes <= old_bytes * kMaxBytesTouchedIncrease) { + return true; + } + VLOG(3) + << "Not padding convolution; doing so would change input / result " + "shape from " + << ShapeUtil::HumanString(old_shape) << " to " + << ShapeUtil::HumanString(new_shape) << ", a size increase of " + << new_bytes / static_cast(old_bytes) << "x > " + << kMaxBytesTouchedIncrease << "x: " << conv->ToString(); + return false; + }; + + if (!check_size_increase(lhs->shape(), new_lhs_shape) || + !check_size_increase(rhs->shape(), new_rhs_shape) || + !check_size_increase(result_shape, new_result_shape)) { + return false; + } + } + + if (ShapeUtil::Equal(lhs->shape(), new_lhs_shape) && + ShapeUtil::Equal(rhs->shape(), new_rhs_shape)) { + VLOG(3) << "No need to pad features of " << conv->ToString(); + return false; + } + + // OK, let's do the transformation! + TF_RETURN_IF_ERROR( + PadConv(conv, new_lhs_shape, new_rhs_shape, new_result_shape)); return true; } -static std::vector GetRelevantConvs(HloComputation* comp) { - std::vector convs; +static std::vector GetRelevantConvs( + HloComputation* comp) { + std::vector convs; for (HloInstruction* instr : comp->instructions()) { - if (IsCustomCallToDnnConvolution(*instr) && - instr->operand(0)->shape().element_type() == F16 && - // TODO(timshen): Disable for fused conv for now. Implement it if it's - // needed. - Cast(instr)->custom_call_target() != - kCudnnConvBiasActivationForwardCallTarget) { - convs.push_back(instr); + if (IsCustomCallToDnnConvolution(*instr)) { + convs.push_back(Cast(instr)); } } return convs; } -StatusOr PadForTensorCores::Run(HloModule* module) { +StatusOr CudnnConvPadForTensorCores::Run(HloModule* module) { bool changed = false; for (HloComputation* comp : module->MakeNonfusionComputations()) { - for (HloInstruction* conv : GetRelevantConvs(comp)) { - TF_ASSIGN_OR_RETURN(bool result, PadFeaturesDims(conv)); + for (HloCustomCallInstruction* conv : GetRelevantConvs(comp)) { + TF_ASSIGN_OR_RETURN(bool result, PadForTensorCores(conv)); changed |= result; } } diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h similarity index 51% rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h index e592a3774ec28605fda912298c74ca7976ff99ac..d4e51e86c1bf2c1f9aef2eed642604092033a538 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h @@ -13,26 +13,30 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_ #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" namespace xla { namespace gpu { -// Ensures that f16 cudnn convolutions have input/output channel dimensions that -// are multiples of 8, inserting pads/slices as necessary. +// Adds padding to cudnn convolutions to make them run faster on GPUs with +// tensor cores. // -// This is useful primarily for Volta and newer GPUs, where tensor cores can -// only be used if the channel dims are multiples of 8. It's probably the -// opposite of useful on other GPUs, so you should check what GPU you're -// targeting before running this pass. +// - f16 convolutions are padded to have input/output channel dimensions that +// are multiples of 8, so that we can use tensor cores. +// +// - f16 convolutions with 3 input channels and 32 or 64 output channels are +// padded to 4 input channels. There's a special-cased cudnn algorithm just +// for this. +// +// Don't run this pass on GPUs without tensor cores -- it will make them slower! // // TODO(jlebar): Also pad dots. -class PadForTensorCores : public HloModulePass { +class CudnnConvPadForTensorCores : public HloModulePass { public: - absl::string_view name() const override { return "pad for tensor cores"; } + absl::string_view name() const override { return "cudnn-conv-pad-for-speed"; } StatusOr Run(HloModule* module) override; }; @@ -40,4 +44,4 @@ class PadForTensorCores : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_FOR_TENSOR_CORES_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PAD_FOR_TENSOR_CORES_H_ diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc similarity index 63% rename from tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc index 5c92b0dcb873b873074704dca8f27d4067b070df..fa3afa6a5d318c399dc38e8934199b5a1393669e 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores_test.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_matchers.h" @@ -29,10 +29,10 @@ namespace { namespace op = xla::testing::opcode_matchers; using ::testing::_; -class PadForTensorCoresTest : public HloVerifiedTestBase {}; +class CudnnConvPadForTensorCoresTest : public HloVerifiedTestBase {}; -TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) { - ParseAndVerifyModule(R"( +TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvInputChannels) { + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -41,11 +41,12 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) { ROOT result = (f16[10,20,30,40], u8[0]) custom-call(input, filter), window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" - })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); - SCOPED_TRACE(module().ToString()); + SCOPED_TRACE(module->ToString()); EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget, op::Pad(op::Parameter(0), _), op::Pad(op::Parameter(1), _))); @@ -55,8 +56,8 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvInputChannels) { ShapeUtil::MakeShape(F16, {2, 2, 48, 40}))); } -TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { - ParseAndVerifyModule(R"( +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -65,9 +66,10 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { ROOT result = (f16[10,20,30,40], u8[0]) custom-call(output, filter), window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardInput" - })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::CustomCall(kCudnnConvBackwardInputCallTarget, op::Pad(op::Parameter(0), _), op::Pad(op::Parameter(1), _))); @@ -77,8 +79,8 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvOutputChannels) { ShapeUtil::MakeShape(F16, {2, 2, 40, 48}))); } -TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) { - ParseAndVerifyModule(R"( +TEST_F(CudnnConvPadForTensorCoresTest, PadF16ForwardConvOutputChannels) { + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -87,17 +89,18 @@ TEST_F(PadForTensorCoresTest, PadF16ForwardConvOutputChannels) { ROOT result = (f16[10,20,30,41], u8[0]) custom-call(input, filter), window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convForward" - })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::Tuple(op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvForwardCallTarget, op::Parameter(0), op::Pad(op::Parameter(1), _)))), _)); } -TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { - ParseAndVerifyModule(R"( +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -107,9 +110,10 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardInput" ROOT gte = f16[10,20,30,41] get-tuple-element(result), index=0 - })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvBackwardInputCallTarget, op::Parameter(0), @@ -117,8 +121,8 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardInputConvInputChannels) { _))); } -TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { - ParseAndVerifyModule(R"( +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -128,9 +132,10 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,41,40] get-tuple-element(result), index=0 - })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvBackwardFilterCallTarget, @@ -138,8 +143,8 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvInputChannels) { _))); } -TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { - ParseAndVerifyModule(R"( +TEST_F(CudnnConvPadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { + auto module = ParseAndReturnVerifiedModule(R"( HloModule TestModule ENTRY TestComputation { @@ -149,9 +154,10 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { window={size=2x2}, dim_labels=b01f_01io->b01f, custom_call_target="__cudnn$convBackwardFilter" ROOT gte = f16[2,2,40,41] get-tuple-element(result), index=0 - })"); - EXPECT_TRUE(PadForTensorCores().Run(&module()).ValueOrDie()); - auto* root = module().entry_computation()->root_instruction(); + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); EXPECT_THAT(root, op::GetTupleElement(op::Tuple( op::Slice(op::GetTupleElement(op::CustomCall( kCudnnConvBackwardFilterCallTarget, @@ -159,6 +165,31 @@ TEST_F(PadForTensorCoresTest, PadF16BackwardFilterConvOutputChannels) { _))); } +TEST_F(CudnnConvPadForTensorCoresTest, PadInputFeatures3To4) { + auto module = ParseAndReturnVerifiedModule(R"( + HloModule TestModule + + ENTRY TestComputation { + input = f16[10,20,30,3] parameter(0) + filter = f16[2,2,3,32] parameter(1) + ROOT result = (f16[10,20,30,32], u8[0]) custom-call(input, filter), + window={size=2x2}, dim_labels=b01f_01io->b01f, + custom_call_target="__cudnn$convForward" + })") + .ValueOrDie(); + EXPECT_TRUE(CudnnConvPadForTensorCores().Run(module.get()).ValueOrDie()); + auto* root = module->entry_computation()->root_instruction(); + + SCOPED_TRACE(module->ToString()); + EXPECT_THAT(root, op::CustomCall(kCudnnConvForwardCallTarget, + op::Pad(op::Parameter(0), _), + op::Pad(op::Parameter(1), _))); + EXPECT_TRUE(ShapeUtil::Equal(root->operand(0)->shape(), + ShapeUtil::MakeShape(F16, {10, 20, 30, 4}))); + EXPECT_TRUE(ShapeUtil::Equal(root->operand(1)->shape(), + ShapeUtil::MakeShape(F16, {2, 2, 4, 32}))); +} + } // anonymous namespace } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc similarity index 93% rename from tensorflow/compiler/xla/service/gpu/pad_insertion.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc index b42a19e3a2200e917f8040be183b8d79c9e4e161..d7829045cc127deaa4c2c9b705dca5285d704af2 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.cc @@ -13,12 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/pad_insertion.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h" #include "absl/memory/memory.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" +#include "tensorflow/compiler/xla/service/hlo_casting_utils.h" #include "tensorflow/compiler/xla/service/hlo_creation_utils.h" #include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/util.h" @@ -131,7 +132,8 @@ HloInstruction* MaybePaddedKernel(const Window& conv_window, } } // namespace -bool PadInsertion::CanonicalizeForwardConvolution(HloInstruction* conv) { +bool CudnnConvPaddingLegalization::CanonicalizeForwardConvolution( + HloInstruction* conv) { if (IsForwardConvolutionCanonical(*conv)) { return false; } @@ -186,7 +188,7 @@ void IncreasePaddingHighBy(int64 delta, WindowDimension* window_dim) { } } // namespace -bool PadInsertion::CanonicalizeBackwardFilterConvolution( +bool CudnnConvPaddingLegalization::CanonicalizeBackwardFilterConvolution( HloInstruction* backward_conv) { CHECK_EQ(backward_conv->custom_call_target(), kCudnnConvBackwardFilterCallTarget); @@ -259,7 +261,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution( return true; } -bool PadInsertion::CanonicalizeBackwardInputConvolution( +bool CudnnConvPaddingLegalization::CanonicalizeBackwardInputConvolution( HloInstruction* backward_conv) { if (window_util::HasSymmetricPadding(backward_conv->window())) { return false; @@ -376,32 +378,33 @@ bool PadInsertion::CanonicalizeBackwardInputConvolution( return true; } -StatusOr PadInsertion::RunOnComputation(HloComputation* computation) { +StatusOr CudnnConvPaddingLegalization::RunOnComputation( + HloComputation* computation) { bool changed = false; - std::vector convs; + std::vector convs; for (auto* instr : computation->instructions()) { if (IsCustomCallToDnnConvolution(*instr)) { - convs.push_back(instr); + convs.push_back(Cast(instr)); } } - for (HloInstruction* instruction : convs) { - const auto& target = instruction->custom_call_target(); - if (target == kCudnnConvForwardCallTarget || - target == kCudnnConvBiasActivationForwardCallTarget) { - changed |= CanonicalizeForwardConvolution(instruction); - } else if (target == kCudnnConvBackwardFilterCallTarget) { - changed |= CanonicalizeBackwardFilterConvolution(instruction); - } else if (target == kCudnnConvBackwardInputCallTarget) { - changed |= CanonicalizeBackwardInputConvolution(instruction); - } else { - LOG(FATAL) << "Unknown custom call target for cudnn conv: " - << instruction->ToString(); - } + for (HloCustomCallInstruction* instruction : convs) { + TF_ASSIGN_OR_RETURN(auto kind, GetCudnnConvKind(instruction)); + changed |= [&] { + switch (kind) { + case CudnnConvKind::kForward: + case CudnnConvKind::kForwardActivation: + return CanonicalizeForwardConvolution(instruction); + case CudnnConvKind::kBackwardInput: + return CanonicalizeBackwardInputConvolution(instruction); + case CudnnConvKind::kBackwardFilter: + return CanonicalizeBackwardFilterConvolution(instruction); + } + }(); } return changed; } -StatusOr PadInsertion::Run(HloModule* module) { +StatusOr CudnnConvPaddingLegalization::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation)); diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h similarity index 78% rename from tensorflow/compiler/xla/service/gpu/pad_insertion.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h index 25cdf64c4cf01300869044d3e4d7c34c85626a5a..7d1b075517fb285222506e0420984906579e681f 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_ #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -24,9 +24,11 @@ namespace gpu { // An HLO pass that canonicalizes convolution instructions for GPU codegen. It // inserts Pad instructions before Convolution instructions with uncanonicalized // padding, so that they can be lowered to cuDNN convolution. -class PadInsertion : public HloModulePass { +class CudnnConvPaddingLegalization : public HloModulePass { public: - absl::string_view name() const override { return "pad insertion"; } + absl::string_view name() const override { + return "cudnn-conv-padding-legalization"; + } StatusOr Run(HloModule* module) override; @@ -41,4 +43,4 @@ class PadInsertion : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_PAD_INSERTION_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_PADDING_LEGALIZATION_H_ diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc similarity index 95% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc index ef292373018295f5100b91c343df274b626c2fa1..5cea66de38c77b7690d9c9485fa0534af30a0ad6 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h" #include #include @@ -188,9 +188,9 @@ std::tuple MatchBackwardFilter( // the amount of high padding the same as the amount of low padding as long // as it is between min_padding_high and max_padding_high. If it is not in // that range, we pick the one that's closest to dim->padding_low() and let - // PadInsertion canonicalize the resultant backward convolution later. - // Picking the closest one minimizes the cost of the kPad instruction to be - // inserted by PadInsertion. + // CudnnConvPaddingLegalization canonicalize the resultant backward + // convolution later. Picking the closest one minimizes the cost of the kPad + // instruction to be inserted by CudnnConvPaddingLegalization. if (dim->padding_low() >= min_padding_high && dim->padding_low() <= max_padding_high) { dim->set_padding_high(dim->padding_low()); @@ -207,7 +207,8 @@ std::tuple MatchBackwardFilter( "negative padding (" << dim->padding_high() << ") on right/bottom of the weight gradients, which is not " - "supported by PadInsertion (b/32744257). Falling back to " + "supported by CudnnConvPaddingLegalization (b/32744257). " + "Falling back to " "unfused convolution for instruction: " << conv->ToString(); return no_match_result; @@ -342,7 +343,8 @@ MatchBackwardInput(HloInstruction* conv) { LOG(ERROR) << "The low padding of the backward convolution would be negative (" << backward_padding_low - << "), which isn't supported by PadInsertion for now (b/32744257)."; + << "), which isn't supported by CudnnConvPaddingLegalization " + "for now (b/32744257)."; return no_match_result; } dim->set_padding_low(backward_padding_low); @@ -371,8 +373,8 @@ MatchBackwardInput(HloInstruction* conv) { dim->set_padding_high(backward_padding_low); } else { // Otherwise, we choose the amount that's closest to backward_padding_low, - // and PadInsertion will later insert kSlice instructions to enforce even - // padding. + // and CudnnConvPaddingLegalization will later insert kSlice + // instructions to enforce even padding. // // For example, consider the backward convolution pattern // @@ -398,9 +400,9 @@ MatchBackwardInput(HloInstruction* conv) { dim->set_padding_high(max_padding_high); } } - // PadInsertion doesn't handle backward input convolution with negative - // padding for now. So fall back to unfused convolution in case of negative - // padding. For example, + // CudnnConvPaddingLegalization doesn't handle backward input + // convolution with negative padding for now. So fall back to unfused + // convolution in case of negative padding. For example, // ABCD = Conv(abc, reverse(xy), padding_high=2) // could be fused to // ABCD = BackwardInputConv(abc, xy, padding_low=1, padding_high=-1) @@ -410,8 +412,8 @@ MatchBackwardInput(HloInstruction* conv) { "negative padding (" << dim->padding_high() << ") on right/bottom of the activations, which is not " - "supported by PadInsertion (b/32744257). Falling back to " - "unfused convolution for instruction: " + "supported by CudnnConvPaddingLegalization (b/32744257). " + "Falling back to unfused convolution for instruction: " << conv->ToString(); return no_match_result; } @@ -525,6 +527,9 @@ StatusOr RunOnInstruction(HloInstruction* conv) { TF_RETURN_IF_ERROR( custom_call->set_backend_config(GetDefaultBackendConfig())); + VLOG(1) << "Replacing convolution " << conv->ToString() << " with " + << custom_call->ToString(); + // The CustomCall returns a tuple (conv_result, scratch_memory). Extract out // the conv result and replace `conv` with it. TF_RETURN_IF_ERROR(conv->parent()->ReplaceWithNewInstruction( @@ -552,7 +557,7 @@ StatusOr RunOnComputation(HloComputation* computation) { } } // namespace -StatusOr CudnnConvolutionRewriter::Run(HloModule* module) { +StatusOr CudnnConvRewriter::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation)); diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h similarity index 74% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h index 8d7c6fdab510407428a115579a90e8cf85e9fad2..d8ec72c27bab8912d0dc2aeead114eb010b87b78 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_ #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -24,11 +24,9 @@ namespace gpu { // Rewrites plain convolutions, backwards-filter convolutions, and // backwards-input convolutions into CustomCall HLOs that call into cuDNN. -class CudnnConvolutionRewriter : public HloModulePass { +class CudnnConvRewriter : public HloModulePass { public: - absl::string_view name() const override { - return "cudnn-convolution-rewriter"; - } + absl::string_view name() const override { return "cudnn-conv-rewriter"; } StatusOr Run(HloModule* module) override; }; @@ -36,4 +34,4 @@ class CudnnConvolutionRewriter : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_REWRITER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_REWRITER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc similarity index 95% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc index d237f8930b74d460ad3d4602670a5afb19b496a2..543160df8ba477126402c607de2989c04c69725e 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter_test.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" @@ -34,9 +34,9 @@ namespace { namespace op = xla::testing::opcode_matchers; using ::testing::_; -class CudnnConvolutionRewriterTest : public HloVerifiedTestBase { +class CudnnConvRewriterTest : public HloVerifiedTestBase { public: - CudnnConvolutionRewriterTest() + CudnnConvRewriterTest() : HloVerifiedTestBase(/*layout_sensitive=*/true, /*allow_mixed_precision=*/false) { for (int i = 0; i < 2; ++i) { @@ -85,7 +85,7 @@ class CudnnConvolutionRewriterTest : public HloVerifiedTestBase { protected: bool RunPass(HloModule* module) { - return CudnnConvolutionRewriter().Run(module).ValueOrDie(); + return CudnnConvRewriter().Run(module).ValueOrDie(); } // A convolution window with stride 1 and zero padding. The size fields are @@ -95,7 +95,7 @@ class CudnnConvolutionRewriterTest : public HloVerifiedTestBase { ConvolutionDimensionNumbers tf_default_dnums_for_backward_input_; }; -TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolve) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolve) { HloComputation::Builder builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -123,7 +123,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolve) { op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0)); } -TEST_F(CudnnConvolutionRewriterTest, +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveEquivalentToForwardConvolution) { HloComputation::Builder builder(TestName()); HloInstruction* activations = @@ -152,8 +152,7 @@ TEST_F(CudnnConvolutionRewriterTest, } // Extracted from block35 training. -TEST_F(CudnnConvolutionRewriterTest, - BackwardFilterConvolveWithPaddedActivations) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedActivations) { auto builder = HloComputation::Builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -183,8 +182,7 @@ TEST_F(CudnnConvolutionRewriterTest, } // Extracted from inception v3 training. -TEST_F(CudnnConvolutionRewriterTest, - BackwardFilterConvolveWithPaddedGradients) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithPaddedGradients) { auto builder = HloComputation::Builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -213,7 +211,7 @@ TEST_F(CudnnConvolutionRewriterTest, op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0)); } -TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolveWithUnevenPadding) { +TEST_F(CudnnConvRewriterTest, BackwardFilterConvolveWithUnevenPadding) { auto builder = HloComputation::Builder(TestName()); HloInstruction* activations = builder.AddInstruction(HloInstruction::CreateParameter( @@ -242,7 +240,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardFilterConvolveWithUnevenPadding) { op::CustomCall(kCudnnConvBackwardFilterCallTarget), 0)); } -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveEvenPadding) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveEvenPadding) { auto builder = HloComputation::Builder(TestName()); HloInstruction* output = builder.AddInstruction(HloInstruction::CreateParameter( @@ -307,7 +305,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveEvenPadding) { // Convolve([abc], [x], base_dilation=2) // = Convolve([abc], Reverse([x]), base_dilation=2) // = BackwardInputConvolve([abc], [x], stride=2) -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolve1x1Filter) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolve1x1Filter) { auto builder = HloComputation::Builder(TestName()); // NHWC dimension order. HloInstruction* output = @@ -341,7 +339,7 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolve1x1Filter) { // BackwardInputConvolve([abc], [x], stride=1) is equivalent to // ForwardConvolve([abc], [x], stride=1). No need to fold it into backward input // convolution. -TEST_F(CudnnConvolutionRewriterTest, +TEST_F(CudnnConvRewriterTest, BackwardInputConvolve1x1FilterEquivalentToForwardConvolve) { auto builder = HloComputation::Builder(TestName()); // NHWC dimension order. @@ -385,8 +383,7 @@ TEST_F(CudnnConvolutionRewriterTest, // 20x10x10x192 // // Gradients are padded unevenly. -TEST_F(CudnnConvolutionRewriterTest, - BackwardInputConvolveUnevenPaddingOnGradients) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnGradients) { auto builder = HloComputation::Builder(TestName()); HloInstruction* output = builder.AddInstruction(HloInstruction::CreateParameter( @@ -436,7 +433,7 @@ TEST_F(CudnnConvolutionRewriterTest, // Similar to BackwardInputConvolveUnevenPadding, but the low padding of the // gradients exceeds kernel_size - 1. Therefore, this pattern cannot be fused. -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveLowPaddingTooLarge) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveLowPaddingTooLarge) { auto builder = HloComputation::Builder(TestName()); HloInstruction* output = builder.AddInstruction(HloInstruction::CreateParameter( @@ -488,9 +485,8 @@ TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveLowPaddingTooLarge) { // padding_low=2, padding_high=1, base_dilation=2) // // We should fuse BC even though padding on activations is uneven, because -// PadInsertion will canonicalize the fusion HLO. -TEST_F(CudnnConvolutionRewriterTest, - BackwardInputConvolveUnevenPaddingOnActivations) { +// CudnnConvPaddingLegalization will canonicalize the fusion HLO. +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveUnevenPaddingOnActivations) { auto builder = HloComputation::Builder(TestName()); // The gradients are in NCHW layout. HloInstruction* output = @@ -543,9 +539,10 @@ TEST_F(CudnnConvolutionRewriterTest, // BC = BackwardInput(FC) does: // [4] = conv([3], reverse([2]), padding_high=2) // -// We currently don't fuse BC because PadInsertion doesn't support negative -// padding on the gradients of backward convolution (b/32744257). -TEST_F(CudnnConvolutionRewriterTest, +// We currently don't fuse BC because CudnnConvPaddingLegalization +// doesn't support negative padding on the gradients of backward convolution +// (b/32744257). +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveNegativePaddingHighOnActivations) { auto builder = HloComputation::Builder(TestName()); // The gradients are in NCHW layout. @@ -586,7 +583,7 @@ TEST_F(CudnnConvolutionRewriterTest, // Check that we will materialize a reversed version of a constant in order to // pattern-match a backwards input convolution. -TEST_F(CudnnConvolutionRewriterTest, BackwardInputConvolveConstantFilter) { +TEST_F(CudnnConvRewriterTest, BackwardInputConvolveConstantFilter) { Array4D constant_arr(4, 4, 2, 2); constant_arr.FillIota(0); string constant_str = diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc similarity index 78% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc index 89dd1bb272663ac1f6eecbaae070d201d38e44c8..0b4fdf71623e1597168c6873a0d2b60176e518ce 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "absl/strings/str_cat.h" #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h" @@ -110,10 +110,10 @@ class ScratchBufAllocator : public se::ScratchAllocator { }; template -Status RunCudnnConvolutionImpl(CudnnConvParams params, - se::ScratchAllocator* scratch_allocator, - se::Stream* stream, - se::dnn::ProfileResult* profile_result) { +Status RunCudnnConvImpl(CudnnConvParams params, + se::ScratchAllocator* scratch_allocator, + se::Stream* stream, + se::dnn::ProfileResult* profile_result) { CudnnConvKind kind = params.kind; const Shape& input_shape = *params.input_shape; const Shape& filter_shape = *params.filter_shape; @@ -312,11 +312,12 @@ StatusOr GetCudnnConvParams( TF_ASSIGN_OR_RETURN(CudnnConvBackendConfig backend_config, conv->backend_config()); - const auto& target = conv->custom_call_target(); + TF_ASSIGN_OR_RETURN(CudnnConvKind kind, GetCudnnConvKind(conv)); const auto& lhs_shape = conv->operand(0)->shape(); const auto& rhs_shape = conv->operand(1)->shape(); const auto& conv_result_shape = conv->shape().tuple_shapes(0); + params.kind = kind; params.window = &conv->window(); params.dnums = &conv->convolution_dimension_numbers(); params.feature_group_count = conv->feature_group_count(); @@ -324,77 +325,76 @@ StatusOr GetCudnnConvParams( backend_config.algorithm(), backend_config.tensor_ops_enabled())); params.conv_result_scale = backend_config.conv_result_scale(); - if (target == kCudnnConvForwardCallTarget) { - params.kind = CudnnConvKind::kForward; - params.input_shape = &lhs_shape; - params.filter_shape = &rhs_shape; - params.output_shape = &conv_result_shape; - params.input_buf = operand_buffers[0]; - params.filter_buf = operand_buffers[1]; - params.output_buf = result_buffer; - } else if (target == kCudnnConvBackwardInputCallTarget) { - params.kind = CudnnConvKind::kBackwardInput; - params.input_shape = &conv_result_shape; - params.filter_shape = &rhs_shape; - params.output_shape = &lhs_shape; - params.input_buf = result_buffer; - params.filter_buf = operand_buffers[1]; - params.output_buf = operand_buffers[0]; - } else if (target == kCudnnConvBackwardFilterCallTarget) { - params.kind = CudnnConvKind::kBackwardFilter; - params.input_shape = &lhs_shape; - params.filter_shape = &conv_result_shape; - params.output_shape = &rhs_shape; - params.input_buf = operand_buffers[0]; - params.filter_buf = result_buffer; - params.output_buf = operand_buffers[1]; - } else if (target == kCudnnConvBiasActivationForwardCallTarget) { - params.kind = CudnnConvKind::kForwardActivation; - params.input_shape = &lhs_shape; - params.filter_shape = &rhs_shape; - params.output_shape = &conv_result_shape; - params.fusion.emplace(); - auto& fusion = *params.fusion; - if (backend_config.activation_mode() < - static_cast(se::dnn::ActivationMode::kNumActivationModes)) { - fusion.mode = static_cast( - backend_config.activation_mode()); - } else { - return InternalError("Bad activation mode: %s", - backend_config.ShortDebugString()); - } - fusion.side_input_scale = backend_config.side_input_scale(); - params.input_buf = operand_buffers[0]; - params.filter_buf = operand_buffers[1]; - params.output_buf = result_buffer; - params.fusion->bias_buf = operand_buffers[2]; - if (operand_buffers.size() >= 4) { - params.fusion->side_input_buf = operand_buffers[3]; + switch (kind) { + case CudnnConvKind::kForward: + params.input_shape = &lhs_shape; + params.filter_shape = &rhs_shape; + params.output_shape = &conv_result_shape; + params.input_buf = operand_buffers[0]; + params.filter_buf = operand_buffers[1]; + params.output_buf = result_buffer; + break; + case CudnnConvKind::kBackwardInput: + params.input_shape = &conv_result_shape; + params.filter_shape = &rhs_shape; + params.output_shape = &lhs_shape; + params.input_buf = result_buffer; + params.filter_buf = operand_buffers[1]; + params.output_buf = operand_buffers[0]; + break; + case CudnnConvKind::kBackwardFilter: + params.input_shape = &lhs_shape; + params.filter_shape = &conv_result_shape; + params.output_shape = &rhs_shape; + params.input_buf = operand_buffers[0]; + params.filter_buf = result_buffer; + params.output_buf = operand_buffers[1]; + break; + case CudnnConvKind::kForwardActivation: { + params.kind = CudnnConvKind::kForwardActivation; + params.input_shape = &lhs_shape; + params.filter_shape = &rhs_shape; + params.output_shape = &conv_result_shape; + params.fusion.emplace(); + auto& fusion = *params.fusion; + if (backend_config.activation_mode() < + static_cast(se::dnn::ActivationMode::kNumActivationModes)) { + fusion.mode = static_cast( + backend_config.activation_mode()); + } else { + return InternalError("Bad activation mode: %s", + backend_config.ShortDebugString()); + } + fusion.side_input_scale = backend_config.side_input_scale(); + params.input_buf = operand_buffers[0]; + params.filter_buf = operand_buffers[1]; + params.output_buf = result_buffer; + params.fusion->bias_buf = operand_buffers[2]; + if (operand_buffers.size() >= 4) { + params.fusion->side_input_buf = operand_buffers[3]; + } } - } else { - return InternalError("Unexpected custom call target: %s", target); } return params; } } // anonymous namespace -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::DeviceMemoryBase scratch_buf, se::Stream* stream, - se::dnn::ProfileResult* profile_result) { +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::DeviceMemoryBase scratch_buf, se::Stream* stream, + se::dnn::ProfileResult* profile_result) { ScratchBufAllocator scratch_allocator(scratch_buf); - return RunCudnnConvolution(conv, operand_buffers, result_buffer, - &scratch_allocator, stream, profile_result); + return RunCudnnConv(conv, operand_buffers, result_buffer, &scratch_allocator, + stream, profile_result); } -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::ScratchAllocator* scratch_allocator, - se::Stream* stream, - se::dnn::ProfileResult* profile_result) { +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::ScratchAllocator* scratch_allocator, se::Stream* stream, + se::dnn::ProfileResult* profile_result) { TF_ASSIGN_OR_RETURN(CudnnConvParams params, GetCudnnConvParams(conv, operand_buffers, result_buffer)); @@ -402,14 +402,14 @@ Status RunCudnnConvolution(const HloCustomCallInstruction* conv, conv->shape().tuple_shapes(0).element_type(); switch (output_primitive_type) { case F16: - return RunCudnnConvolutionImpl(params, scratch_allocator, - stream, profile_result); + return RunCudnnConvImpl(params, scratch_allocator, stream, + profile_result); case F32: - return RunCudnnConvolutionImpl(params, scratch_allocator, stream, - profile_result); + return RunCudnnConvImpl(params, scratch_allocator, stream, + profile_result); case F64: - return RunCudnnConvolutionImpl(params, scratch_allocator, stream, - profile_result); + return RunCudnnConvImpl(params, scratch_allocator, stream, + profile_result); default: LOG(FATAL) << ShapeUtil::HumanString(*params.output_shape); } diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h similarity index 67% rename from tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h rename to tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h index 61aec1ceccec0f253f9ddaa688d64cacea800cf3..edbc75a94a1238540390b93f0fa5217852c7781f 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ #include "absl/types/optional.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" @@ -42,20 +42,19 @@ namespace gpu { // allocator and take note of how much memory is used. The next time you call // the same conv, you can provide an explicitly preallocated scratch buffer of // that size, if you like. -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::DeviceMemoryBase scratch_buf, se::Stream* stream, - se::dnn::ProfileResult* profile_result = nullptr); - -Status RunCudnnConvolution(const HloCustomCallInstruction* conv, - absl::Span operand_buffers, - se::DeviceMemoryBase result_buffer, - se::ScratchAllocator* scratch_allocator, - se::Stream* stream, - se::dnn::ProfileResult* profile_result = nullptr); +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::DeviceMemoryBase scratch_buf, se::Stream* stream, + se::dnn::ProfileResult* profile_result = nullptr); + +Status RunCudnnConv(const HloCustomCallInstruction* conv, + absl::Span operand_buffers, + se::DeviceMemoryBase result_buffer, + se::ScratchAllocator* scratch_allocator, se::Stream* stream, + se::dnn::ProfileResult* profile_result = nullptr); } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONVOLUTION_RUNNER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_CONV_RUNNER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc similarity index 97% rename from tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc rename to tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc index 3761c19cfcab10e0c6faa17c2d1d535d706ff6c5..8ac11bcf657db4eab76c611b8975e12e190994c5 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.cc +++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h" #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/backend_configs.pb.h" @@ -234,14 +234,15 @@ StatusOr> TryRewriteToCudnnForwardRelu( config.set_side_input_scale(alpha_side_input); TF_RETURN_IF_ERROR(new_conv->set_backend_config(config)); - VLOG(1) << "Rewriting " << conv->name() << " to " << new_conv->name(); + VLOG(1) << "Replacing convolution " << conv->ToString() << " with " + << new_conv->ToString(); return HloInstruction::CreateGetTupleElement(conv->shape().tuple_shapes(0), new_conv, 0); } } // namespace -StatusOr CudnnFusedConvolutionRewriter::Run(HloModule* module) { +StatusOr CudnnFusedConvRewriter::Run(HloModule* module) { bool changed = false; for (HloComputation* computation : module->MakeNonfusionComputations()) { std::vector matches; diff --git a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h similarity index 77% rename from tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h rename to tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h index bd12aadded9dd9e19bc695ddc11e5529931a306a..613ed8dbdc33dfc3684deb5fd3ee8f5b9ea5fc50 100644 --- a/tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h +++ b/tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_ +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_ #include "tensorflow/compiler/xla/service/hlo_instructions.h" #include "tensorflow/compiler/xla/service/hlo_pass_interface.h" @@ -22,7 +22,7 @@ limitations under the License. namespace xla { namespace gpu { -class CudnnFusedConvolutionRewriter : public HloModulePass { +class CudnnFusedConvRewriter : public HloModulePass { public: absl::string_view name() const override { return "cudnn-fused-convolution-rewriter"; @@ -34,4 +34,4 @@ class CudnnFusedConvolutionRewriter : public HloModulePass { } // namespace gpu } // namespace xla -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONVOLUTION_REWRITER_H_ +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_CUDNN_FUSED_CONV_REWRITER_H_ diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index c1aaa4bf04ddc31edf723c056805ae5aad994e55..6dcdaf1cfe06e446deed847aaf29088a7ed10e13 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -358,13 +358,6 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( const HloInstruction* operand = hlo->operand(0); const Window& window = hlo->window(); - // TODO(b/31410564): Implement dilation for reduce-window. - if (window_util::HasDilation(window)) { - return Unimplemented( - "Dilation for reduce-window not implemented on GPU. " - "See b/31410564."); - } - PrimitiveType operand_element_type = operand->shape().element_type(); llvm::Value* accum_ptr = llvm_ir::EmitAllocaAtFunctionEntry( llvm_ir::PrimitiveTypeToIrType(operand_element_type, module_), @@ -397,9 +390,24 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( for (size_t i = 0; i < index.size(); ++i) { llvm::Value* stridden_index = NSWMul( index[i], index_typed_const(window.dimensions(i).stride())); + input_index[i] = NSWSub( + NSWAdd(stridden_index, + NSWMul(window_index[i], + index_typed_const( + window.dimensions(i).window_dilation()))), + index_typed_const(window.dimensions(i).padding_low())); + + // We need to verify that we are not in the dilated base area. + llvm::Value* dilation_condition = ICmpEQ( + SRem(input_index[i], + index_typed_const(window.dimensions(i).base_dilation())), + index_typed_const(0)); + in_bounds = And(in_bounds, dilation_condition); + + // Apply base dilation to the index. input_index[i] = - NSWSub(NSWAdd(stridden_index, window_index[i]), - index_typed_const(window.dimensions(i).padding_low())); + SDiv(input_index[i], + index_typed_const(window.dimensions(i).base_dilation())); // We must check whether 0 ≤ input_index[i] < bound, as otherwise // we are in the pad and so can skip the computation. This diff --git a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc index 27a4d0b601f3807fe6b94dd6171a44f292921ede..7d01eeb02567d710e9de089c7f29ffcc5f959f9a 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_hlo_support_checker_test.cc @@ -57,10 +57,13 @@ TEST_F(GpuHloSupportCheckerTest, SparseUnimplemented) { HloInstruction::CreateParameter(1, sparse_shape, "param1")); builder.AddInstruction(HloInstruction::CreateBinary( sparse_shape, HloOpcode::kAdd, param0, param1)); - auto module = CreateNewModule(); + // Since verifier is reporting sparse layouts as errors, we should + // use a regular HloModule instead of VerifiedHloModule to avoid + // verifier errors being triggered in the destructor. + auto module = HloTestBase::CreateNewModule(); module->AddEntryComputation(builder.Build()); - Status status = checker().Run(module).status(); + Status status = checker().Run(module.get()).status(); ASSERT_EQ(status.code(), tensorflow::error::UNIMPLEMENTED); EXPECT_THAT(status.error_message(), HasSubstr("GPU backend does not support")); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 74352f26aa9c3a2ca597da21735438df92f863ab..1c0a23fa3eb38961d420aff05e412c3b4d8524e7 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -18,7 +18,6 @@ limitations under the License. #include #include "tensorflow/compiler/xla/layout_util.h" -#include "tensorflow/compiler/xla/service/gpu/gpu_options.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" #include "tensorflow/compiler/xla/service/hlo_casting_utils.h" @@ -125,14 +124,8 @@ Status GpuLayoutAssignment::AddBackendConstraintsToDnnConvCustomCall( DataLayout input; FilterLayout filter; DataLayout output; - if (ConvUseLayoutHeuristic(instr->GetModule()->config())) { - std::tie(input, filter, output) = - HeuristicLayoutAssignment(instr, stream_executor_); - } else { - input = DataLayout::kBatchDepthYX; - filter = FilterLayout::kOutputInputYX; - output = DataLayout::kBatchDepthYX; - } + std::tie(input, filter, output) = + HeuristicLayoutAssignment(instr, stream_executor_); TF_ASSIGN_OR_RETURN( std::tie(*input_shape->mutable_layout(), @@ -215,21 +208,37 @@ Status GpuLayoutAssignment::AddBackendConstraints( constraints->SetOperandLayout(op1_shape, instruction, 1)); TF_RETURN_IF_ERROR( constraints->SetInstructionLayout(output_shape, instruction)); + } else if (instruction->opcode() == HloOpcode::kSort && + ShapeUtil::Rank(instruction->operand(0)->shape()) > 1) { + // Make sure that all the operands and the output(s) have the same layout. + Shape keys_shape = instruction->operand(0)->shape(); + Layout keys_layout = + LayoutUtil::GetDefaultLayoutForRank(ShapeUtil::Rank(keys_shape)); + for (int64 i = 0; i < instruction->operand_count(); ++i) { + Shape shape = instruction->operand(i)->shape(); + *shape.mutable_layout() = keys_layout; + TF_RETURN_IF_ERROR( + constraints->SetOperandLayout(shape, instruction, i)); + const LogicalBuffer* output_buffer; + if (ShapeUtil::IsArray(instruction->shape())) { + TF_ASSIGN_OR_RETURN( + output_buffer, + constraints->points_to_analysis().GetBufferDefinedAt(instruction, + {})); + } else { + TF_ASSIGN_OR_RETURN( + output_buffer, + constraints->points_to_analysis().GetBufferDefinedAt(instruction, + {i})); + } + TF_RETURN_IF_ERROR( + constraints->SetBufferLayout(keys_layout, *output_buffer)); + } } } return Status::OK(); } -bool GpuLayoutAssignment::CustomCallRequiresMajorFirstLayout( - const HloInstruction* instruction) { - // - Inputs to cudnn batchnorm custom calls don't need the major-first layout - // (i.e. {n, n-1, ...0}) -- we can handle any layout. - // - Inputs to cudnn convolution require custom layouts handled in - // AddBackendConstraints. - return !IsCustomCallToDnnBatchNorm(*instruction) && - !IsCustomCallToDnnConvolution(*instruction); -} - Status GpuLayoutAssignment::PropagateOperandConstraint( const OperandLayoutConstraint& layout_constraint, LayoutConstraints* constraints) { diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h index 4ba7989e9cba9abe6cdc1fcabd5f011bd9cfb0ec..6a48e55fd2e784f80a50f4565107db177fb43bfc 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.h @@ -46,8 +46,6 @@ class GpuLayoutAssignment : public LayoutAssignment { Status PropagateBufferConstraint( const BufferLayoutConstraint& buffer_constraint, LayoutConstraints* constraints) override; - bool CustomCallRequiresMajorFirstLayout( - const HloInstruction* instruction) override; private: Status AddBackendConstraintsToDnnConvCustomCall( diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc index 04681cfcec792d86eed95585262691932b07b269..4822b820f4e229336e2b26cfbd0097c8c31a50c8 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment_test.cc @@ -351,7 +351,8 @@ TEST_F(LayoutAssignmentTest, DotLayout) { ParseHloString(hlo_text)); ComputationLayout computation_layout( - module->entry_computation()->ComputeProgramShape()); + module->entry_computation()->ComputeProgramShape(), + /*ignore_layouts=*/false); GpuLayoutAssignment layout_assignment( &computation_layout, LayoutAssignment::InstructionCanChangeLayout, backend().default_stream_executor()); @@ -364,6 +365,34 @@ TEST_F(LayoutAssignmentTest, DotLayout) { op::ShapeWithLayout(expected_shape))); } +TEST_F(LayoutAssignmentTest, SortLayout) { + const char* hlo_text = R"( + HloModule SortLayout + ENTRY sort { + keys = f32[3,2]{0,1} constant(f32[3,2]{0,1}{{0,1},{0,1},{0,1}}) + values = f32[2,3]{1,0} parameter(0) + transpose = f32[3,2]{1,0} transpose(values), dimensions={1,0} + ROOT sort = (f32[3,2]{1,0}, f32[3,2]{1,0}) sort(keys, transpose), + dimensions={1} + })"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(hlo_text)); + + ComputationLayout computation_layout( + module->entry_computation()->ComputeProgramShape(), + /*ignore_layouts=*/false); + GpuLayoutAssignment layout_assignment( + &computation_layout, LayoutAssignment::InstructionCanChangeLayout, + backend().default_stream_executor()); + EXPECT_TRUE(layout_assignment.Run(module.get()).ValueOrDie()); + + Shape expected_shape = ShapeUtil::MakeShapeWithLayout(F32, {3, 2}, {1, 0}); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Sort(op::ShapeWithLayout(expected_shape), + op::ShapeWithLayout(expected_shape))); +} + } // namespace } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index b61f0387392d2301109a484ca5c1f65f18882265..1d66787d8927ad818cbc66d19429c1816fc51748 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -47,6 +47,7 @@ bool IsFusible(const HloInstruction& hlo) { hlo.opcode() == HloOpcode::kReduce || hlo.opcode() == HloOpcode::kReduceWindow || hlo.opcode() == HloOpcode::kReshape || + hlo.opcode() == HloOpcode::kScatter || hlo.opcode() == HloOpcode::kSlice || hlo.opcode() == HloOpcode::kTranspose; } @@ -223,6 +224,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, return false; } + // Scatter is only supported at the root of a kInput fusion. + if (producer->opcode() == HloOpcode::kScatter) { + return false; + } + // Do not fuse into reduce input fusions if the resulting kernel would suffer // from poor data locality (due to unfriendly input layouts). if (IsInputFusibleReduction(*consumer) && @@ -285,7 +291,8 @@ bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer, HloInstruction::FusionKind GpuInstructionFusion::ChooseKind( const HloInstruction* producer, const HloInstruction* consumer) { - if (IsReductionToVector(*consumer)) { + if (IsReductionToVector(*consumer) || + consumer->opcode() == HloOpcode::kScatter) { return HloInstruction::FusionKind::kInput; } if (producer->opcode() == HloOpcode::kDot || diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 96bfe0c12eb9cd6ef25804d6b34767471616f7e4..fd9b7cee80bdad9a8ed625872ae68bede10200b3 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -709,5 +709,44 @@ TEST_F(InstructionFusionTest, AvoidsLargeFusion) { } } +TEST_F(InstructionFusionTest, FuseIntoScatter) { + auto module = ParseHloString(R"( + HloModule test_module + + add { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(lhs, rhs) + } + + ENTRY FuseIntoScatter { + p0 = s32[3,3] parameter(0) + operand = s32[3,3] add(p0, p0) + p1 = s32[2] parameter(1) + indices = s32[2] add(p1, p1) + p2 = s32[2,3] parameter(2) + updates = s32[2,3] add(p2, p2) + scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + ROOT add = s32[3,3] add(scatter, scatter) + })") + .ValueOrDie(); + + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Add(op::Fusion(), op::Fusion())); + EXPECT_EQ(root->operand(0)->fusion_kind(), + HloInstruction::FusionKind::kInput); + EXPECT_THAT(root->operand(0)->fused_expression_root(), + op::Scatter(op::Add(), op::Add(), op::Add())); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h index a64a616ab1329422d0197f4a7f99ec557a95f8ed..f373d4a8393a047aba599b0fae954e98a740161e 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.h @@ -108,9 +108,9 @@ bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo); // memory used by cudnn. Callers shouldn't inspect scratch_memory, as its value // is not well-defined. // -// CudnnConvolutionRewriter lowers kConvolution HLOs to these custom calls. +// CudnnConvRewriter lowers kConvolution HLOs to these custom calls. // When it does so, it chooses algorithm -1 and 0 bytes of scratch space. Later -// on in the pipeline, CudnnConvolutionAlgorithmChooser chooses an explicit +// on in the pipeline, CudnnConvAlgorithmChooser chooses an explicit // algorithm for each conv and sets the amount of scratch space needed. // // (Representing the scratch memory as an output may seem strange at first, but diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index b7c37bcf3ca910f10d18339dfe7f1d29f2a55c9e..a3821e077ecf6b1dce1e2c8785fe3a59516db2be 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -179,6 +179,21 @@ bool IrEmitter::MaybeEmitDirectAtomicOperation( bool is_atomic_integral = element_type == S32 || element_type == U32 || element_type == S64 || element_type == U64; llvm::Value* source = Load(source_address, "source"); + + // kCopy of RHS -> atomic store. + if (root_opcode == HloOpcode::kCopy && + (element_type == F32 || is_atomic_integral) && + computation.root_instruction()->operand(0)->opcode() == + HloOpcode::kParameter && + computation.root_instruction()->operand(0)->parameter_number() == 1) { + llvm::StoreInst* store = Store(source, output_address); + store->setAtomic(llvm::AtomicOrdering::Unordered); + // Derive a minimum alignment from the type. The optimizer can increase it + // later. + store->setAlignment(ShapeUtil::ByteSizeOfPrimitiveType(element_type)); + return true; + } + if (root_opcode == HloOpcode::kAdd) { // NVPTX supports atomicAdd on F32 and integer types. if (element_type == F32) { @@ -480,18 +495,10 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { TF_RET_CHECK(!ShapeUtil::IsScalar(lhs_shape) && !ShapeUtil::IsScalar(rhs_shape)); - // Reduce along the last dimension of the LHS and the second-to-last dimension - // of the RHS. Vectors are a special case where the reduction dimension is 0 - // for both LHS and RHS. This results in a vector dot product producing a - // scalar. - const int64 lhs_reduction_dimension = - ShapeUtil::GetDimensionNumber(lhs_shape, -1); - const int64 rhs_reduction_dimension = - ShapeUtil::Rank(rhs_shape) >= 2 + dnums.lhs_batch_dimensions_size() - ? ShapeUtil::GetDimensionNumber(rhs_shape, -2) - : dnums.lhs_batch_dimensions_size(); - - // Check that the batch dims don't cover the last two dims. + const int64 lhs_reduction_dimension = dnums.lhs_contracting_dimensions(0); + const int64 rhs_reduction_dimension = dnums.rhs_contracting_dimensions(0); + + // Check that the batch dims don't cover the reduction dimensions. for (int64 batch_dim : dnums.lhs_batch_dimensions()) { CHECK_NE(lhs_reduction_dimension, batch_dim); CHECK_NE(rhs_reduction_dimension, batch_dim); @@ -499,7 +506,11 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { // Verify the reduction dimension in the two operands are the same size. TF_RET_CHECK(lhs_shape.dimensions(lhs_reduction_dimension) == - rhs_shape.dimensions(rhs_reduction_dimension)); + rhs_shape.dimensions(rhs_reduction_dimension)) + << "lhs_shape.dimensions(" << lhs_reduction_dimension + << ") = " << lhs_shape.dimensions(lhs_reduction_dimension) + << ", and rhs_shape.dimensions(" << rhs_reduction_dimension + << ") = " << rhs_shape.dimensions(rhs_reduction_dimension); // Create loop nests which loop through the LHS operand dimensions and the RHS // operand dimensions. The reduction dimension of the LHS and RHS are handled diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index c792dd2ddb0faeba076548ba104aa291e0814140..008398328c7e5acea10ea313c6065edb36d27ee8 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -34,6 +34,7 @@ limitations under the License. #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor.h" @@ -43,7 +44,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/convolution_thunk.h" #include "tensorflow/compiler/xla/service/gpu/copy_thunk.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_thunk.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_runner.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_runner.h" #include "tensorflow/compiler/xla/service/gpu/fft_thunk.h" #include "tensorflow/compiler/xla/service/gpu/for_thunk.h" #include "tensorflow/compiler/xla/service/gpu/gemm_thunk.h" @@ -493,13 +494,68 @@ Status IrEmitterUnnested::HandleFft(HloInstruction* fft) { Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { HloInstruction* root = fusion->fused_expression_root(); - // HandleFusion specializes reduction from a multi-dimensional array to a 1D - // array. The specialized version requires a initializer thunk that - // initializes the output array to the initial value of the reduce. if (HloInstruction::FusionKind::kInput == fusion->fusion_kind()) { switch (root->opcode()) { + case HloOpcode::kScatter: { + std::vector> thunks; + // The initialization from 'operand' is using different loop bounds, so + // emit it in a separate kernel. Treat it like a loop fusion, writing to + // the output buffer. + { + int unroll_factor = ComputeMaxUnrollFactor(fusion); + thunks.push_back(BuildKernelThunk( + fusion, /*implements_whole_instruction=*/false, unroll_factor)); + + std::vector operand_parameter_arrays; + for (HloInstruction* operand : fusion->operands()) { + operand_parameter_arrays.push_back(GetIrArray(*operand, *fusion)); + } + GpuElementalIrEmitter operand_elemental_emitter( + hlo_module_config_, ir_emitter_context_->llvm_module(), &b_, + GetNestedComputer()); + FusedIrEmitter operand_fused_emitter(operand_parameter_arrays, + &operand_elemental_emitter); + TF_RETURN_IF_ERROR( + root->mutable_operand(0)->Accept(&operand_fused_emitter)); + + TF_RETURN_IF_ERROR(EmitTargetElementLoopInThunk( + *fusion, operand_fused_emitter.GetGenerator(root->operand(0)), + static_cast(thunks.back().get()))); + } + + // Now build the actual scatter, reading and writing to the freshly + // filled output buffer. + { + thunks.push_back( + BuildKernelThunk(fusion, + /*implements_whole_instruction=*/false)); + // Spin up a new fused emitter for the scatter kernel and emit it. + std::vector scatter_parameter_arrays; + for (HloInstruction* operand : fusion->operands()) { + scatter_parameter_arrays.push_back(GetIrArray(*operand, *fusion)); + } + GpuElementalIrEmitter scatter_elemental_emitter( + hlo_module_config_, ir_emitter_context_->llvm_module(), &b_, + GetNestedComputer()); + FusedIrEmitter scatter_fused_emitter(scatter_parameter_arrays, + &scatter_elemental_emitter); + TF_RETURN_IF_ERROR(root->Accept(&scatter_fused_emitter)); + TF_RETURN_IF_ERROR(EmitScatter( + thunks.back().get(), root, + /*scatter_indices_gen=*/ + scatter_fused_emitter.GetGenerator(root->operand(1)), + /*updates_gen=*/ + scatter_fused_emitter.GetGenerator(root->operand(2)))); + } + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), fusion)); + return Status::OK(); + } case HloOpcode::kTuple: case HloOpcode::kReduce: { + // HandleFusion specializes reduction from a multi-dimensional array to + // a 1D array. The specialized version requires a initializer thunk that + // initializes the output array to the initial value of the reduce. if (root->opcode() == HloOpcode::kReduce && ShapeUtil::IsTuple(root->shape())) { // TODO(b/112040122): Support variadic reduce. @@ -1958,6 +2014,178 @@ Status IrEmitterUnnested::HandleRng(HloInstruction* rng) { return Status::OK(); } +Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { + const HloInstruction* operand = scatter->operand(0); + const HloInstruction* scatter_indices = scatter->operand(1); + const HloInstruction* updates = scatter->operand(2); + + std::vector> thunks; + + // Copy the operand into the output if it's not the same buffer already. + auto operand_buffer = GetAllocationSlice(*operand); + auto destination_buffer = GetAllocationSlice(*scatter); + if (operand_buffer != destination_buffer) { + thunks.push_back(absl::make_unique( + /*source_address=*/operand_buffer, + /*destination_buffer=*/destination_buffer, + /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter)); + } + + thunks.push_back( + BuildKernelThunk(scatter, + /*implements_whole_instruction=*/thunks.empty())); + + TF_RETURN_IF_ERROR( + EmitScatter(thunks.back().get(), scatter, + /*scatter_indices_gen=*/ + [=](const IrArray::Index& index) { + return GetIrArray(*scatter_indices, *scatter) + .EmitReadArrayElement(index, &b_, "scatter_index"); + }, + /*updates_gen=*/ + [=](const IrArray::Index& index) { + return GetIrArray(*updates, *scatter) + .EmitReadArrayElement(index, &b_, "update"); + })); + + // Elide the sequential thunk if there's no copy. + if (thunks.size() == 1) { + thunk_sequence_->push_back(std::move(thunks[0])); + } else { + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), scatter)); + } + return Status::OK(); +} + +Status IrEmitterUnnested::EmitScatter( + Thunk* thunk, HloInstruction* scatter, + const llvm_ir::ElementGenerator& scatter_indices_gen, + const llvm_ir::ElementGenerator& updates_gen) { + const HloInstruction* operand = scatter->operand(0); + const HloInstruction* scatter_indices = scatter->operand(1); + const HloInstruction* updates = scatter->operand(2); + const ScatterDimensionNumbers& dim_numbers = + scatter->scatter_dimension_numbers(); + CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape())); + + auto loop_body_emitter = [&](const IrArray::Index& index) -> Status { + std::vector raw_window_multidim; + std::vector input_scatter_multidim; + std::vector raw_window_bounds; + + // Partition the index into window indices and scatter indices. + for (int64 i = 0, e = index.size(); i != e; ++i) { + // For window indices also remember the window size, this comes in handy + // later. + if (absl::c_binary_search(dim_numbers.update_window_dims(), i)) { + raw_window_multidim.push_back(index[i]); + raw_window_bounds.push_back(updates->shape().dimensions(i)); + } else { + input_scatter_multidim.push_back(index[i]); + } + } + DCHECK_EQ(raw_window_multidim.size(), + dim_numbers.update_window_dims_size()); + + // Apply inserted_window_dims to the window dimensions. + int64 raw_window_multidim_idx = 0; + std::vector input_window_multidim; + std::vector input_window_bounds; + for (int64 i = 0, e = ShapeUtil::Rank(operand->shape()); i != e; ++i) { + if (absl::c_binary_search(dim_numbers.inserted_window_dims(), i)) { + input_window_bounds.push_back(1); // Trivial dimension. + input_window_multidim.push_back(index.GetConstantWithIndexType(0)); + } else { + input_window_bounds.push_back( + raw_window_bounds[raw_window_multidim_idx]); + input_window_multidim.push_back( + raw_window_multidim[raw_window_multidim_idx]); + ++raw_window_multidim_idx; + } + } + DCHECK_EQ(input_window_multidim.size(), ShapeUtil::Rank(operand->shape())); + + // Insert a 1 dimension at the end if index_vector_dim requests one. + Shape scatter_indices_shape = scatter_indices->shape(); + if (dim_numbers.index_vector_dim() == + ShapeUtil::Rank(scatter_indices_shape)) { + scatter_indices_shape.add_dimensions(1); + scatter_indices_shape.mutable_layout()->add_minor_to_major( + dim_numbers.index_vector_dim()); + } + + // Now load the indices corresponding to the current window from + // scatter_indices. + llvm_ir::IrArray::Index raw_scatter_index_index(input_scatter_multidim, + index.GetType()); + raw_scatter_index_index.InsertAt(dim_numbers.index_vector_dim(), nullptr); + llvm::Value* is_in_bounds = b_.getTrue(); + for (int64 i = 0, e = dim_numbers.scatter_dims_to_operand_dims_size(); + i != e; ++i) { + // Our index is stored along index_vector_dim, insert that into the lookup + // index into scatter_indices. + raw_scatter_index_index[dim_numbers.index_vector_dim()] = + raw_scatter_index_index.GetConstantWithIndexType(i); + + int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i); + TF_ASSIGN_OR_RETURN( + llvm::Value* const loaded_scatter_index, + scatter_indices_gen(raw_scatter_index_index.SourceIndexOfReshape( + scatter_indices_shape, scatter_indices->shape(), &b_))); + // And add the index to our window index. This yields the output index. + llvm::Value* casted_scatter_index = + IntCast(loaded_scatter_index, index.GetType(), + /*isSigned=*/true); + llvm::Value* dim_offset = + Add(input_window_multidim[operand_dim], casted_scatter_index); + input_window_multidim[operand_dim] = dim_offset; + + // Also do the bounds check now. + int64 max_index = operand->shape().dimensions(operand_dim) - + input_window_bounds[operand_dim] + 1; + // is_in_bounds = index >= 0 && index < dim_size-window_size+1 + // --> index u< dim_size-window_size+1 + is_in_bounds = + And(is_in_bounds, ICmpULT(casted_scatter_index, + index.GetConstantWithIndexType(max_index))); + } + + llvm_ir::LlvmIfData if_window_in_bounds_data = llvm_ir::EmitIfThenElse( + is_in_bounds, "scatter.in_bounds", &b_, /*emit_else=*/false); + llvm_ir::SetToFirstInsertPoint(if_window_in_bounds_data.true_block, &b_); + // All done, now just read from the calculated input from the window, and do + // an atomic store to the calculated location in the output. + llvm_ir::IrArray::Index input_window_index(input_window_multidim, + index.GetType()); + HloInstruction* output_hlo = + scatter->IsFused() ? scatter->parent()->FusionInstruction() : scatter; + llvm::Value* output_address = + GetIrArray(*output_hlo, *output_hlo) + .EmitArrayElementAddress(input_window_index, &b_); + llvm::Value* input_address = Alloca(llvm_ir::PrimitiveTypeToIrType( + updates->shape().element_type(), module_)); + TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, updates_gen(index)); + Store(input_ir_value, input_address); + return EmitAtomicOperationForNestedComputation( + *scatter->to_apply(), output_address, input_address); + }; + + // Launch a kernel that reads every element in the updates tensor. We could + // also do one kernel per window instead if bounds checks turn out to be a + // bottleneck. + LaunchDimensions launch_dimensions = CalculateLaunchDimensions( + updates->shape(), ir_emitter_context_->device_description()); + UpdateLaunchDimensions(launch_dimensions, thunk, + ir_emitter_context_->llvm_module()); + + return ParallelLoopEmitter(loop_body_emitter, updates->shape(), + launch_dimensions, &b_) + .EmitLoop(IrName(scatter), + GetIndexTypeForKernel(scatter, launch_dimensions.launch_bound(), + &b_)); +} + Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { thunk_sequence_->push_back( BuildKernelThunk(select, /*implements_whole_instruction=*/true)); @@ -1966,34 +2194,34 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { std::vector> thunks; - auto keys = sort->operand(0); - auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr; - ShapeIndex keys_shape_index({}); - ShapeIndex values_shape_index({}); - if (values != nullptr) { - keys_shape_index = ShapeIndex({0}); - values_shape_index = ShapeIndex({1}); - } - auto keys_destination = GetAllocationSlice(*sort, keys_shape_index); - auto values_destination = GetAllocationSlice(*sort, values_shape_index); - - if (keys_destination != GetAllocationSlice(*keys)) { - thunks.push_back(absl::make_unique( - /*source_address=*/GetAllocationSlice(*keys), - /*destination_buffer=*/keys_destination, - /*mem_size=*/ShapeUtil::ByteSizeOf(keys->shape()), nullptr)); - } - if (values != nullptr && values_destination != GetAllocationSlice(*values)) { - // TODO(b/26783907): Figure out why we never seem to share buffers for - // key/value sort. - thunks.push_back(absl::make_unique( - /*source_address=*/GetAllocationSlice(*values), - /*destination_buffer=*/values_destination, - /*mem_size=*/ShapeUtil::ByteSizeOf(values->shape()), nullptr)); + Shape keys_shape = sort->operand(0)->shape(); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + // We assume that the layout of all involved operands and outputs is the + // same. + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape, + sort->operand(i)->shape())); + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index))); + + // If possible, we share buffers. If that is not possible, we need to copy + // the values, because the emitter does the sorting in-place. + auto destination_buffer = GetAllocationSlice(*sort, shape_index); + auto source_address = GetAllocationSlice(*sort->operand(i)); + if (destination_buffer != source_address) { + // TODO(b/26783907): Figure out why we never seem to share buffers for + // key/value sort. + thunks.push_back(absl::make_unique( + /*source_address=*/source_address, + /*destination_buffer=*/destination_buffer, + /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()), + nullptr)); + } } int64 dimension_to_sort = sort->dimensions(0); - int64 dimension_to_sort_bound = keys->shape().dimensions(dimension_to_sort); + int64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort); int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound); auto index_type = b_.getInt64Ty(); @@ -2017,7 +2245,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { thunks.push_back( BuildKernelThunk(sort, /*implements_whole_instruction=*/false)); LaunchDimensions launch_dimensions = CalculateLaunchDimensions( - keys->shape(), ir_emitter_context_->device_description()); + keys_shape, ir_emitter_context_->device_description()); UpdateLaunchDimensions(launch_dimensions, thunks.back().get(), ir_emitter_context_->llvm_module()); @@ -2028,12 +2256,21 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) { xor_mask = llvm::ConstantInt::get(index_type, 1LL << mask); } + IrArray keys_array; + std::vector values_arrays; + values_arrays.reserve(sort->operand_count() - 1); + for (int64 i = 0; i < sort->operand_count(); ++i) { + ShapeIndex shape_index = + sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({}); + if (i == 0) { + keys_array = GetIrArray(*sort, *sort, shape_index); + } else { + values_arrays.push_back(GetIrArray(*sort, *sort, shape_index)); + } + } TF_RETURN_IF_ERROR(llvm_ir::EmitSortInPlace( - dimension_to_sort, GetIrArray(*sort, *sort, keys_shape_index), - values != nullptr ? absl::make_optional( - GetIrArray(*sort, *sort, values_shape_index)) - : absl::nullopt, - IrName(sort), xor_mask, &b_, &launch_dimensions)); + dimension_to_sort, keys_array, values_arrays, IrName(sort), xor_mask, + &b_, &launch_dimensions)); } } diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index bd5db7205155dc6b15ddea069e172bbd8f419996..93f11c069a4cebdf3c79cba17c824eded4f4b1db 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -76,6 +76,7 @@ class IrEmitterUnnested : public IrEmitter { Status HandleInfeed(HloInstruction* xla_infeed) override; Status HandleOutfeed(HloInstruction* outfeed) override; Status HandleRng(HloInstruction* random) override; + Status HandleScatter(HloInstruction* scatter) override; Status HandleSelect(HloInstruction* select) override; Status HandleSort(HloInstruction* sort) override; Status HandleTupleSelect(HloInstruction* tuple_select) override; @@ -184,6 +185,14 @@ class IrEmitterUnnested : public IrEmitter { absl::Span> extra_output_gens); + // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in + // the process. `scatter` may be fused, scatter indices are taken from + // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is + // expected to have the operand values in it already. + Status EmitScatter(Thunk* thunk, HloInstruction* scatter, + const llvm_ir::ElementGenerator& scatter_indices_gen, + const llvm_ir::ElementGenerator& updates_gen); + // Returns true if a 0-2-1 tiling algorithm is already used to emit the kernel // for the hlo instruction. bool CheckAndEmitHloWithTile021(HloInstruction* hlo); diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index ac6c2c5565be3a469a539dde7a60e7fee3f09339..791d414c915e6f23d84a38ae99dcfa9a59ab6353 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -38,9 +38,11 @@ limitations under the License. #include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_algorithm_picker.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_convolution_rewriter.h" -#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_convolution_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_algorithm_picker.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_pad_for_tensor_cores.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_padding_legalization.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_conv_rewriter.h" +#include "tensorflow/compiler/xla/service/gpu/cudnn_fused_conv_rewriter.h" #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" #include "tensorflow/compiler/xla/service/gpu/gpu_constants.h" #include "tensorflow/compiler/xla/service/gpu/gpu_copy_insertion.h" @@ -54,8 +56,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h" #include "tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/nvptx_backend_lib.h" #include "tensorflow/compiler/xla/service/gpu/multi_output_fusion.h" -#include "tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.h" -#include "tensorflow/compiler/xla/service/gpu/pad_insertion.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_assignment.h" #include "tensorflow/compiler/xla/service/gpu/stream_executor_util.h" @@ -75,7 +75,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h" #include "tensorflow/compiler/xla/service/reshape_mover.h" -#include "tensorflow/compiler/xla/service/scatter_expander.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" @@ -176,8 +175,6 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // elimination has to come after that pass. pipeline.AddPass(); - pipeline.AddPass(); - pass.AddPass( /*is_layout_sensitive=*/false, [](const Shape&, const Shape&) { return false; }); @@ -204,21 +201,22 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, { // Convert convolutions into CustomCalls to cudnn, then canonicalize them - // (PadInsertion). + // (CudnnConvPaddingLegalization). HloPassPipeline pipeline("conv_canonicalization"); pipeline.AddInvariantChecker(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false); - pipeline.AddPass(); - pipeline.AddPass(); - pipeline.AddPass(); + pipeline.AddPass(); + pipeline.AddPass(); + pipeline.AddPass(); if (IsVoltaOrLater(*stream_exec)) { - pipeline.AddPass(); - // PadForTensorCores leaves behind unnecessary tuple/get-tuple-element - // pairs that TupleSimplifier fixes. + pipeline.AddPass(); + // CudnnConvPadForTensorCores leaves behind unnecessary + // tuple/get-tuple-element pairs that TupleSimplifier fixes. pipeline.AddPass(); } - // CudnnConvolutionRewriter, PadInsertion and PadForTensorCores may add - // instructions which can be simplified by constant folding. + // CudnnConvRewriter, CudnnConvPaddingLegalization and + // CudnnConvPadForTensorCores may add instructions which can be simplified + // by constant folding. pipeline.AddPass(); TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); } @@ -255,7 +253,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // Choose the fastest algorithm for each conv. // // We pick the algorithm before fusion so we can generate better HLO. After - // CudnnConvolutionRewriter, our convolutions are CustomCalls which return a + // CudnnConvRewriter, our convolutions are CustomCalls which return a // tuple (conv_result, scratch_memory), and the each conv uses 0 bytes of // scratch: // @@ -273,12 +271,12 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // The new tuple and gte instructions then be simplified away, because // nobody is expected to use the scratch value. // - // However, if we were to run CudnnConvolutionAlgorithmPicker after fusion + // However, if we were to run CudnnConvAlgorithmPicker after fusion // the gte(customcall, 0) would probably already be into a fusion node. We // can't simplify across HloComputation boundaries, so in this case we // wouldn't be able to simplify away the new_tuple bits. - pipeline.AddPass( - stream_exec, device_allocator, compiler); + pipeline.AddPass(stream_exec, device_allocator, + compiler); // Clean up new_tuple described above. pipeline.AddPass(); @@ -827,9 +825,8 @@ std::vector NVPTXCompiler::CompilePtxOrGetCachedResult(const string& ptx, } StatusOr>> -NVPTXCompiler::CompileAheadOfTime( - std::vector> module, - const AotCompilationOptions& options) { +NVPTXCompiler::CompileAheadOfTime(std::unique_ptr module_group, + const AotCompilationOptions& options) { return Unimplemented( "not yet implemented: NVPTXCompiler::CompileAheadOfTime"); } diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h index c4a0b727cd3d9ae0af61c1752c1608cd4fb65d2d..f79ae2990ae7d6e6985b15727a72358289121aa9 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.h @@ -59,7 +59,7 @@ class NVPTXCompiler : public LLVMCompiler { DeviceMemoryAllocator* device_allocator) override; StatusOr>> - CompileAheadOfTime(std::vector> module, + CompileAheadOfTime(std::unique_ptr module_group, AotCompilationOptions const& options) override; se::Platform::Id PlatformId() const override; diff --git a/tensorflow/compiler/xla/service/gpu/tests/BUILD b/tensorflow/compiler/xla/service/gpu/tests/BUILD index a7255335672a3622d122e9fc5ebfab236a5ba895..d22ffc1754dfd43f9e5e0677553f26610f4b8112 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/BUILD +++ b/tensorflow/compiler/xla/service/gpu/tests/BUILD @@ -211,8 +211,8 @@ tf_cc_test( ) tf_cc_test( - name = "cudnn_fused_convolution_rewriter_test", - srcs = ["cudnn_fused_convolution_rewriter_test.cc"], + name = "cudnn_fused_conv_rewriter_test", + srcs = ["cudnn_fused_conv_rewriter_test.cc"], tags = tf_cuda_tests_tags(), deps = [ ":gpu_codegen_test", @@ -223,3 +223,15 @@ tf_cc_test( "@com_google_absl//absl/strings", ], ) + +tf_cc_test( + name = "gpu_atomic_test", + srcs = ["gpu_atomic_test.cc"], + tags = tf_cuda_tests_tags(), + deps = [ + ":gpu_codegen_test", + "//tensorflow/compiler/xla/tests:filecheck", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) diff --git a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc similarity index 93% rename from tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc rename to tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc index 5632cac1862e21825888d94ab1eee5e1c9fd6800..8bdb4c8080aabe8cc324291ad9fc28b01d4eaf35 100644 --- a/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_convolution_rewriter_test.cc +++ b/tensorflow/compiler/xla/service/gpu/tests/cudnn_fused_conv_rewriter_test.cc @@ -22,7 +22,7 @@ namespace xla { namespace gpu { namespace { -class CudnnFusedConvolutionRewriterTest : public HloTestBase { +class CudnnFusedConvRewriterTest : public HloTestBase { protected: string GetOptimizedHlo(absl::string_view hlo_string) { return backend() @@ -66,7 +66,7 @@ class CudnnFusedConvolutionRewriterTest : public HloTestBase { } }; -TEST_F(CudnnFusedConvolutionRewriterTest, TestConvOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestConvOnly) { // max(0, conv(x, w)); TestMatchWithAllTypes(R"( HloModule Test @@ -83,7 +83,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestConvOnly) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestBias) { +TEST_F(CudnnFusedConvRewriterTest, TestBias) { // max(0, conv(x, w) + bias); TestMatchWithAllTypes(R"( HloModule Test @@ -103,7 +103,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestBias) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestSideInputOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestSideInputOnly) { // max(0, conv(x, w) + side_input); TestMatchWithAllTypes(R"( HloModule Test @@ -122,7 +122,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestSideInputOnly) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestBiasAndSideInput) { +TEST_F(CudnnFusedConvRewriterTest, TestBiasAndSideInput) { // max(0, conv(x, w) + side_input + bias); TestMatchWithAllTypes(R"( HloModule Test @@ -144,7 +144,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestBiasAndSideInput) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConv) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConv) { // max(0, 0.999994934 * conv(x, w)); TestMatchWithAllTypes(R"( HloModule Test @@ -164,7 +164,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConv) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndSideInput) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndSideInput) { // max(0, conv(x, w) + 0.899994934 * side_input); TestMatchWithAllTypes(R"( HloModule Test @@ -186,7 +186,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndSideInput) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndScaledSideInput) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndScaledSideInput) { // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input); TestMatchWithAllTypes(R"( HloModule Test @@ -211,8 +211,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestScaledConvAndScaledSideInput) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, - TestScaledConvAndScaledSideInputWithBias) { +TEST_F(CudnnFusedConvRewriterTest, TestScaledConvAndScaledSideInputWithBias) { // max(0, 0.999994934 * conv(x, w) + 0.899994934 * side_input + bias); TestMatchWithAllTypes(R"( HloModule Test @@ -240,7 +239,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchMaxZeroOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestMatchMaxZeroOnly) { // max(0.1, conv(x, w)) shouldn't match. TestNotMatchWithAllTypes(R"( HloModule Test @@ -257,7 +256,7 @@ TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchMaxZeroOnly) { })"); } -TEST_F(CudnnFusedConvolutionRewriterTest, TestMatchBroadcastedBiasOnly) { +TEST_F(CudnnFusedConvRewriterTest, TestMatchBroadcastedBiasOnly) { // max(0, conv(x, w) + side_input1 + side_input2) shouldn't match. TestNotMatchWithAllTypes(R"( HloModule Test diff --git a/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..6b18c4c63714b4b3c06d7fa85f4a7a75b8e9ae12 --- /dev/null +++ b/tensorflow/compiler/xla/service/gpu/tests/gpu_atomic_test.cc @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +#include "tensorflow/compiler/xla/service/gpu/tests/gpu_codegen_test.h" +#include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/core/platform/test.h" + +namespace xla { +namespace gpu { +namespace { + +class GpuAtomicTest : public GpuCodegenTest {}; + +TEST_F(GpuAtomicTest, TestStore) { + const char* hlo_string = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } +)"; + + CompileAndVerifyIr(hlo_string, R"( +CHECK: store atomic{{.*}}unordered, align 4 +)"); +} + +} // namespace +} // namespace gpu +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index 1ea26ddd5b9ee01eaeb812b32539c7820d3d5dda..dbab62f847e8ca5e0b46dfd4162a0f4222640252 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; option cc_enable_arenas = true; // Serialization of HloInstruction. -// Next ID: 56 +// Next ID: 58 message HloInstructionProto { reserved 10; reserved "parameter_name"; @@ -184,6 +184,13 @@ message HloInstructionProto { // Sharding for kDomain instructions. xla.OpSharding domain_entry_sharding = 54; xla.OpSharding domain_exit_sharding = 55; + + // For custom call this indicates that the layouts are constrained. If + // constrain_layout is true then the 'shape' field must contain a layout, and + // 'operand_shapes_with_layout' must contain a shape with layout for each + // operand. + bool constrain_layout = 56; + repeated Shape operand_shapes_with_layout = 57; } // Serialization of HloComputation. @@ -218,6 +225,32 @@ message HloScheduleProto { map sequences = 1; } +message HloInputOutputAliasProto { + // The following proto describes a pair of aliased an input + // (described by parameter number and a ShapeIndex of the parameter) + // and an output (described by a ShapeIndex of the root + // instruction). For example: + // + // entry = { + // output_shape_index={1}, + // parameter_number=0, + // parameter_shape_index={1, 2}, + // } + // + // This entry indicates that the first paremter's {1, 2} element is + // aliased with the {1} element of the root instruction. + message AliasEntryProto { + // ShapeIndex of the root hlo. + repeated int64 output_shape_index = 1; + // Number of the parameter in entry computation. + int64 parameter_number = 2; + // ShapeIndex of the parameter instruction. + repeated int64 parameter_shape_index = 3; + } + + repeated AliasEntryProto entries = 1; +} + // Serialization of HloModule. message HloModuleProto { string name = 1; @@ -228,14 +261,17 @@ message HloModuleProto { // callees appear before their callers. repeated HloComputationProto computations = 3; - // The program shape (with layout) of the entry computation. - xla.ProgramShape program_shape = 4; + // The host program shape (with layout) of the entry computation. + xla.ProgramShape host_program_shape = 4; // The id of this module. int64 id = 5; // The schedule for this module. HloScheduleProto schedule = 7; + + // Describes alias information between inputs and outputs. + HloInputOutputAliasProto input_output_alias = 8; } // Serialization of LogicalBuffer. diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index c3da12e273c77793647981f8653649155aac9483..cf8e6594cbe5ffd28ca75dd5006e8817f1e8581c 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -59,8 +59,9 @@ class BufferValueMap { // construction process. using BufferNumber = int64; - explicit BufferValueMap(const HloDataflowAnalysis& dataflow) - : dataflow_(dataflow) { + explicit BufferValueMap(HloModule* module, + const HloDataflowAnalysis& dataflow) + : module_(module), dataflow_(dataflow) { buffers_.reserve(dataflow_.values().size()); value_to_buffer_number_.reserve(dataflow_.values().size()); for (const HloValue* value : dataflow_.values()) { @@ -171,6 +172,42 @@ class BufferValueMap { return value_to_buffer_number_.at(&value); } + void ComputeInputOutputAliasedBuffers( + const HloValue& value, std::vector* aliased_buffers) { + // Get parameter value from an aliased_input object. + const auto get_parameter_value = + [this](const std::pair& aliased_input) + -> const HloValue& { + int64 param_number = aliased_input.first; + const ShapeIndex& param_index = aliased_input.second; + return dataflow_.GetUniqueValueAt( + module_->entry_computation()->parameter_instruction(param_number), + param_index); + }; + + // If the value shows up in a root instruction, alias it with parameter + // intruction. + for (const HloPosition& pos : value.positions()) { + if (pos.instruction == module_->entry_computation()->root_instruction()) { + ShapeIndex output_index = pos.index; + + auto aliased_input = + module_->input_output_alias_config().GetAliasedParameter( + output_index); + if (aliased_input) { + aliased_buffers->push_back( + GetBufferForValue(get_parameter_value(*aliased_input))); + } + } + } + + // If the value is parameter instruction itself, alias it with itself. + if (value.instruction()->opcode() == HloOpcode::kParameter && + value.instruction()->parent() == module_->entry_computation()) { + aliased_buffers->push_back(GetBufferForValue(value)); + } + } + void ComputeWhileAliasedBuffers(const HloValue& value, std::vector* aliased_buffers) { VLOG(3) << "Compute kWhile aliases"; @@ -278,6 +315,7 @@ class BufferValueMap { VLOG(2) << "Use of value " << value.ToShortString() << ": " << use; } std::vector aliased_buffers; + ComputeInputOutputAliasedBuffers(value, &aliased_buffers); ComputeWhileAliasedBuffers(value, &aliased_buffers); ComputeConditionalAliasedBuffers(value, &aliased_buffers); // Uniquify aliased buffers. @@ -288,6 +326,8 @@ class BufferValueMap { return aliased_buffers; } + HloModule* module_; + // Dataflow analysis used to construct the buffer map. const HloDataflowAnalysis& dataflow_; @@ -461,7 +501,7 @@ StatusOr> HloAliasAnalysis::Run( /*bitcast_defines_value=*/false, fusion_can_share_buffer)); - BufferValueMap buffer_map(alias_analysis->dataflow_analysis()); + BufferValueMap buffer_map(module, alias_analysis->dataflow_analysis()); buffer_map.MergeAliasedBuffers(); // Create a vector of HloBuffers, one for each set of values in the diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc index 0cd0ab36fcf832af9a71ab5837c94f9b39bc4bf3..5c8d97b2d15e15d15cb8014a7d25b37437ce8aec 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc @@ -217,6 +217,181 @@ TEST_F(HloAliasAnalysisTest, NondistinctTuple) { EXPECT_FALSE(AnyValuesInSameBufferInterfere()); } +TEST_F(HloAliasAnalysisTest, ParametersWithAliasing) { + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + + auto negate0 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte0)); + auto negate1 = builder.AddInstruction( + HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kNegate, gte1)); + + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({negate0, negate1})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + // Cannot alias an output twice. + ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); +} + +TEST_F(HloAliasAnalysisTest, ParametersWithCrossAliasing) { + // parameter 0 aliased with output 1 and parameter 1 aliased with output 0. + // + // (p0 , p1) + // \ / + // \ / + // alias X + // / \ + // / \ + // (p0 , p1) + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + auto gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 0)); + auto gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, param, 1)); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte1})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{1})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{0})); + + // Cannot alias an output twice. + ASSERT_IS_NOT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + // Every Ops in this graph are aliased with each other. + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + EXPECT_EQ(analysis.GetUniqueBufferAt(gte0), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); + + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{0})); + EXPECT_EQ(analysis.GetUniqueBufferAt(gte1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{1})); +} + +TEST_F(HloAliasAnalysisTest, InputOutputAliasingWithWhile) { + // Test a simple single while instruction can be aliased with input and output + // of the computation. + // + // body((F32[], F32[]) %tuple_param): + // %add = Add(%tuple_param{0}, %tuple_param{1}) + // return Tuple(%tuple_param{0}, %add) + // + // condition((F32[], F32[]) %tuple_param): + // return Constant(false) + // + // entry: + // %param1 = param1 + // %while = While(%param1, body, condition) + // %while_1 = GTE(%while, 0) + // %while_2 = GTE(%while, 1) + // %negate_1 = Negate(%while_1) + // %negate_2 = Negate(%while_2) + // return Tuple(negate_1, negate_2) + // + const Shape tuple_shape = + ShapeUtil::MakeTupleShape({scalar_shape_, scalar_shape_}); + + // Element 0 passes transparently through the body. + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "param")); + auto body_element_0 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 0)); + auto body_element_1 = body_builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, body_param, 1)); + auto add = body_builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape_, HloOpcode::kAdd, body_element_0, body_element_1)); + auto body_tuple = body_builder.AddInstruction( + HloInstruction::CreateTuple({body_element_0, add})); + HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); + + // Condition computation trivially returns a constant "false". + auto cond_builder = HloComputation::Builder("condition"); + auto cond_param = cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "param")); + cond_builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(false))); + HloComputation* condition = + module_->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, tuple_shape, "p0")); + + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(tuple_shape, condition, body, param)); + auto while_element_1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 0)); + auto while_element_2 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(scalar_shape_, xla_while, 1)); + auto negate_1 = builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape_, HloOpcode::kNegate, while_element_1)); + auto negate_2 = builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape_, HloOpcode::kNegate, while_element_2)); + auto tuple = + builder.AddInstruction(HloInstruction::CreateTuple({negate_1, negate_2})); + module_->AddEntryComputation(builder.Build()); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{0}, /*param_number=*/0, /*param_index=*/{0})); + TF_ASSERT_OK(module_->input_output_alias_config().SetUpAlias( + /*output_index=*/{1}, /*param_number=*/0, /*param_index=*/{1})); + + const HloAliasAnalysis& analysis = RunAnalysis(); + + EXPECT_THAT( + GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})), + UnorderedElementsAre(GetValueDefinedAt(param, {1}), + GetValueDefinedAt(xla_while, /*index=*/{1}), + GetValueDefinedAt(body_param, {1}), + GetValueDefinedAt(cond_param, {1}), + GetValueDefinedAt(add), + GetValueDefinedAt(negate_2))); + + EXPECT_THAT( + analysis.GetUniqueBufferAt(xla_while, /*index=*/{1}).ComputePositions(), + UnorderedElementsAre( + HloPosition{param, {1}}, HloPosition{xla_while, {1}}, + HloPosition{while_element_2, {}}, HloPosition{body_param, {1}}, + HloPosition{body_element_1, {}}, HloPosition{add, {}}, + HloPosition{body_tuple, {1}}, HloPosition{tuple, {1}}, + HloPosition{cond_param, {1}}, HloPosition{negate_2, {}})); + + EXPECT_FALSE(AnyValuesInSameBufferInterfere()); +} + TEST_F(HloAliasAnalysisTest, SingleCall) { // Test a single call of a subcomputation. The subcomputation adds its two // array-shaped parameters. diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index c2041c466708fd8c88d34f14fbc0064905f594a9..b0f7cd91ad1db0a59c09cfbfc1885813dc57e01e 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -215,7 +215,7 @@ Status HloComputation::RemoveInstructionAndUnusedOperands( if (removed.count(item) != 0 || item->user_count() != 0 || item == root_instruction() || !IsRemovable(item) || - item->HasSideEffect()) { + (item->HasSideEffect() && item != instruction)) { continue; } for (int i = 0; i < item->operand_count(); ++i) { diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index d87ab4bda162a74421e8906e07cfcb97e2128fe4..dec96d11a93cf56d3c40a6bb7882ffb7336aeeb0 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -128,9 +128,10 @@ class HloComputation { // users. Instruction is deallocated with this call. Status RemoveInstruction(HloInstruction* instruction); - // Remove an instruction from the computation and also transitively any - // operand that has no users post removing an instruction. The instruction - // must have no users. Instruction is deallocated with this call. + // Remove an instruction (including side effecting ones) from the computation + // and also transitively any operand that has no side effect and no users post + // removing an instruction. The instruction must have no users. Instruction is + // deallocated with this call. Status RemoveInstructionAndUnusedOperands(HloInstruction* instruction); // Set the root of the computation to the given instruction. The instruction diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index a502fff9a0f1e40065746f2193bf76b1adefdb31..23ab4cda93fc5d6979308bdf9a87f0a16d465154 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -664,6 +664,11 @@ Status HloCostAnalysis::HandleConditional(const HloInstruction* conditional) { } Status HloCostAnalysis::HandleGather(const HloInstruction* gather) { + // Gather doesn't read the whole input buffer, it's equivalent to a copy the + // size of the output shape and a read of the gather indices. + current_properties_[kBytesAccessedKey] = + GetShapeSize(gather->shape()) * 2 + + GetShapeSize(gather->operand(1)->shape()); // Gather does not issue any flops. return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc index d76ce9ecbca67ae3bc3db4ee2452f30ccec5b88b..802cdfc9e454cf05db18fad9bc7f44fdc146a92e 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis_test.cc @@ -556,5 +556,30 @@ TEST_F(HloCostAnalysisTest, DynamicUpdateSlice) { EXPECT_EQ(analysis.bytes_accessed(), 8); } +TEST_F(HloCostAnalysisTest, Gather) { + // Test the analysis on a gather. + XlaBuilder builder("gather"); + Shape operand_shape = ShapeUtil::MakeShape(S32, {3, 3}); + Shape indices_shape = ShapeUtil::MakeShape(S32, {2}); + + auto operand = Parameter(&builder, 0, operand_shape, "operand"); + auto indices = Parameter(&builder, 1, indices_shape, "indices"); + GatherDimensionNumbers dim_numbers; + dim_numbers.add_offset_dims(1); + dim_numbers.add_collapsed_slice_dims(0); + dim_numbers.add_start_index_map(0); + dim_numbers.set_index_vector_dim(1); + Gather(operand, indices, dim_numbers, {1, 3}); + + auto hlo_module = BuildHloGraph(&builder); + + // Run HLO cost analysis. + HloCostAnalysis analysis(ShapeSize); + ASSERT_IS_OK( + hlo_module->entry_computation()->root_instruction()->Accept(&analysis)); + + EXPECT_EQ(analysis.bytes_accessed(), 56); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index c22adcdd8dd936eebca3a8f0d85b1254401b5ef4..5dcf6bc985ff18fa6fc1ab5a5692914b4597d065 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -126,7 +126,7 @@ bool HloDataflowAnalysis::ValueIsDefinedAt(const HloInstruction* instruction, const HloValue& HloDataflowAnalysis::GetValueDefinedAt( const HloInstruction* instruction, const ShapeIndex& index) const { - CHECK(ValueIsDefinedAt(instruction, index)); + CHECK(ValueIsDefinedAt(instruction, index)) << instruction->ToString(); return GetUniqueValueAt(instruction, index); } @@ -1048,6 +1048,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser( } if (user->opcode() == HloOpcode::kDynamicUpdateSlice || + user->opcode() == HloOpcode::kScatter || user->opcode() == HloOpcode::kWhile) { // We eliminated other users in BufferLiveness::live_range_strictly_before, // so here we just need to check that the use is at operand index 0. diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 510d6360a1cf94ef06d2ed919a57c7a825886834..909853106d57d181e85e3e4134b4039be2b176f5 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -2283,6 +2283,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { dataflow_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {})); } +TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) { + const char* hlo_text = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); + computation_ = module_->entry_computation(); + RunAnalysis(); + + HloInstruction* operand_param = computation_->parameter_instruction(0); + HloInstruction* indices_param = computation_->parameter_instruction(1); + HloInstruction* updates_param = computation_->parameter_instruction(2); + HloInstruction* scatter = computation_->root_instruction(); + + EXPECT_TRUE(dataflow_analysis_->CanShareOperandBufferWithUser( + operand_param, {}, scatter, {})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser( + indices_param, {}, scatter, {})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser( + updates_param, {}, scatter, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto builder = HloComputation::Builder(TestName()); @@ -2308,7 +2346,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) { auto values = builder.AddInstruction( HloInstruction::CreateParameter(1, values_shape, "values")); auto sort = builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values)); + ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, + {values})); BuildModuleAndRunAnalysis(builder.Build()); diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.cc b/tensorflow/compiler/xla/service/hlo_domain_map.cc index 6ca1255edec377cf0738a1ad2596cb06aa1c2c6f..c6d02f9f67bb599e496d20fc2acf2e627ed54438 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_map.cc +++ b/tensorflow/compiler/xla/service/hlo_domain_map.cc @@ -42,18 +42,19 @@ namespace xla { return std::move(domain_map); } -bool HloDomainMap::InSameDomain(HloInstruction* instruction1, - HloInstruction* instruction2) const { +bool HloDomainMap::InSameDomain(const HloInstruction* instruction1, + const HloInstruction* instruction2) const { int64 domain_id1 = GetDomainId(instruction1); int64 domain_id2 = GetDomainId(instruction2); return domain_id1 >= 0 && domain_id1 == domain_id2; } -int64 HloDomainMap::GetDomainId(HloInstruction* instruction) const { +int64 HloDomainMap::GetDomainId(const HloInstruction* instruction) const { return FindOrDefault(instruction_to_domain_, instruction, -1); } -int64 HloDomainMap::GetDomainMetadataId(HloInstruction* instruction) const { +int64 HloDomainMap::GetDomainMetadataId( + const HloInstruction* instruction) const { return FindOrDie(domain_metadata_id_, instruction); } @@ -200,7 +201,8 @@ StatusOr> HloDomainMap::CreateDomain( return std::move(domain); } -bool HloDomainMap::IsDomainInstruction(HloInstruction* instruction) const { +bool HloDomainMap::IsDomainInstruction( + const HloInstruction* instruction) const { if (instruction->opcode() != HloOpcode::kDomain) { return false; } diff --git a/tensorflow/compiler/xla/service/hlo_domain_map.h b/tensorflow/compiler/xla/service/hlo_domain_map.h index c8d581b74677674ed8682ecc1fa022cea890a649..bce7d1aa7cf1822ef1608674e7bf9483c628e4b5 100644 --- a/tensorflow/compiler/xla/service/hlo_domain_map.h +++ b/tensorflow/compiler/xla/service/hlo_domain_map.h @@ -58,21 +58,21 @@ class HloDomainMap { } // Checks whether two instructions are within the same domain. - bool InSameDomain(HloInstruction* instruction1, - HloInstruction* instruction2) const; + bool InSameDomain(const HloInstruction* instruction1, + const HloInstruction* instruction2) const; // Checks whether instruction is a kDomain instruction of the kind we are // currently processing. - bool IsDomainInstruction(HloInstruction* instruction) const; + bool IsDomainInstruction(const HloInstruction* instruction) const; // Retrieves the domain identifier of the instruction, or -1 in case // instruction is not found within any domain. - int64 GetDomainId(HloInstruction* instruction) const; + int64 GetDomainId(const HloInstruction* instruction) const; // Returns the unique id of the domain metadata for the domain the given // instruction belongs to. The given instruction must not be a kDomain // instruction since each domain instruction is associated with 2 domains. - int64 GetDomainMetadataId(HloInstruction* instruction) const; + int64 GetDomainMetadataId(const HloInstruction* instruction) const; private: // Map used for representing instruction ordering, i.e. @@ -119,8 +119,8 @@ class HloDomainMap { string domain_kind_; std::vector> instruction_domains_; - absl::flat_hash_map instruction_to_domain_; - absl::flat_hash_map domain_metadata_id_; + absl::flat_hash_map instruction_to_domain_; + absl::flat_hash_map domain_metadata_id_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index eec8d242faaa70e84ab5b46904b0a0ea41d5b009..c2998883851481b3cda5a3423baa3454018117b2 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -24,6 +24,7 @@ limitations under the License. #include #include "absl/algorithm/container.h" +#include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/string_view.h" #include "tensorflow/compiler/xla/index_util.h" @@ -189,6 +190,11 @@ HloEvaluator::HloEvaluator(int64 max_loop_iterations) return Unimplemented( "HloEvaluatorTypedVisitor: unhandled primitive type: OPAQUE."); }); + typed_visitors_[TOKEN] = + absl::make_unique([](HloInstruction*) { + return Unimplemented( + "HloEvaluatorTypedVisitor: unhandled primitive type: TOKEN."); + }); } template @@ -1228,7 +1234,7 @@ StatusOr EvaluateSortInternal(HloInstruction* sort, TF_RET_CHECK( ShapeUtil::SameDimensions(keys_literal.shape(), values_literal.shape())) << "Sort keys and values must have the same dimensions"; - TF_RET_CHECK(sort->operand_count() == 2) << "Expected key-value sort"; + TF_RET_CHECK(sort->operand_count() >= 2) << "Expected key-value sort"; // We need to sort an array of keys and an array of values, where the // sorted order of the values is determined by the keys. The simplest(?) // way to do this is to go to an array-of-pairs representation, sort the @@ -1279,7 +1285,9 @@ StatusOr EvaluateSortInternal(HloInstruction* sort, return SafeLess(a.first, b.first); }); std::vector result_keys; - std::vector result_values; + // We use a InlinedVector here because we need to convert it to an + // absl::Span later, and this would not work with std::vector. + absl::InlinedVector result_values; for (const auto& key_value : key_value_vector) { result_keys.push_back(key_value.first); result_values.push_back(key_value.second); @@ -1315,7 +1323,10 @@ template StatusOr EvaluateSortCurried(HloInstruction* sort, const Literal& keys_literal, const Literal& values_literal) { - switch (sort->operand(1)->shape().element_type()) { + switch (values_literal.shape().element_type()) { + case PRED: + return EvaluateSortInternal(sort, keys_literal, + values_literal); case F32: return EvaluateSortInternal(sort, keys_literal, values_literal); @@ -1355,14 +1366,24 @@ Status HloEvaluator::HandleSort(HloInstruction* sort) { if (!ShapeUtil::IsTuple(sort->shape())) { return DefaultAction(sort); } else { - auto result = EvaluateSort(sort, GetEvaluatedLiteralFor(sort->operand(0)), - GetEvaluatedLiteralFor(sort->operand(1))); - if (result.ok()) { - evaluated_[sort] = std::move(result.ValueOrDie()); - return Status::OK(); - } else { - return result.status(); + // This is a really stupid work-around for the fact it's hard to support a + // multi-value sort directly, due to the fact we need to template the + // evaluation function on all of the value types. + std::vector sort_results_backing; + for (int64 i = 0; i < sort->operand_count(); ++i) { + auto result = EvaluateSort(sort, GetEvaluatedLiteralFor(sort->operand(0)), + GetEvaluatedLiteralFor(sort->operand(i))); + if (!result.ok()) { + return result.status(); + } + sort_results_backing.push_back( + std::move(result.ValueOrDie().DecomposeTuple()[1])); } + std::vector sort_results; + absl::c_transform(sort_results_backing, std::back_inserter(sort_results), + [](const Literal& literal) { return &literal; }); + evaluated_[sort] = LiteralUtil::MakeTuple(sort_results); + return Status::OK(); } } diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index cee11a8a2166f96ae801095b6364921ed05d0000..608a42bb60702aa075daca39535ca1672dcc5467 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -1463,6 +1463,58 @@ TEST_P(HloEvaluatorTest, ReduceWindowMax) { EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); } +TEST_P(HloEvaluatorTest, ReduceWindowMaxWindowDilation) { + HloComputation::Builder b(TestName()); + + // arg: + // f32[3,3] { + // { 1, 2, 3 }, + // { 5, 6, 7 }, + // { 9, 10, 11 }, + // } + auto arg_array = absl::make_unique>(3, 3); + arg_array->FillUnique(1.0f); + auto arg_literal = LiteralUtil::CreateR2FromArray2D(*arg_array); + + HloInstruction* arg_instruction = + b.AddInstruction(HloInstruction::CreateConstant(std::move(arg_literal))); + + auto init_value = b.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(0.f))); + + HloComputation::Builder max_computation("max"); + Shape scalar_shape = ShapeUtil::MakeShape(F32, {}); + auto param_lhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "lhs")); + auto param_rhs = max_computation.AddInstruction( + HloInstruction::CreateParameter(1, scalar_shape, "rhs")); + max_computation.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kMaximum, param_lhs, param_rhs)); + auto max_func = module().AddEmbeddedComputation(max_computation.Build()); + + Window window; + WindowDimension dim; + dim.set_size(2); + dim.set_stride(1); + dim.set_padding_low(0); + dim.set_padding_high(0); + dim.set_window_dilation(2); + dim.set_base_dilation(1); + *window.add_dimensions() = dim; + *window.add_dimensions() = dim; + + Shape shape = ShapeUtil::MakeShape(F32, {1, 1}); + b.AddInstruction(HloInstruction::CreateReduceWindow( + shape, arg_instruction, init_value, window, max_func)); + + module().AddEntryComputation(b.Build()); + + Literal result = Evaluate(); + + auto expected = LiteralUtil::CreateR2({{11}}); + EXPECT_TRUE(LiteralTestUtil::Equal(expected, result)); +} + TEST_P(HloEvaluatorTest, ReduceWindowAdd) { HloComputation::Builder b(TestName()); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h index b2d12c94b848e4fd8ae473fdc0e4a9f5fecf6286..84fbbd3e0c3ddb704b8db601897f3b199dc99626 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator_typed_visitor.h @@ -1072,66 +1072,66 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { // Convolve input feature with kernel. do { + // Find corresponding spatial dimension index for input (lhs). + int64 lhs_linear_spatial_index = 0; + int64 rhs_linear_spatial_index = 0; + for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { + // Spatial dimension number for input (lhs) and output. + const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); + const int64 output_spatial_dim = dnums.output_spatial_dimensions(ki); + + // Calculate lhs (input) index without taking base dilation into + // account. + const auto& window_dim = window.dimensions(ki); + const int64 undilated_index = + out_index[output_spatial_dim] * window_dim.stride() - + window_dim.padding_low() + + rhs_spatial_index[ki] * window_dim.window_dilation(); + // Skip if the lhs (input) index is to be dilated. As an + // optimization, skip this mod if there's no dilation. + if (window_dim.base_dilation() > 1 && + undilated_index % window_dim.base_dilation() != 0) { + goto cnt; + } + + // Calculate the actual lhs (input) index after dilation. As an + // optimization, skip this integer divide if there's no dilation. + int64 lhs_spatial_index; + if (window_dim.base_dilation() > 1) { + lhs_spatial_index = undilated_index / window_dim.base_dilation(); + } else { + lhs_spatial_index = undilated_index; + } + + // Skip if input index is not in bounds. + if (!(lhs_spatial_index >= 0 && + lhs_spatial_index < lhs_shape.dimensions(input_spatial_dim))) { + goto cnt; + } + + lhs_linear_spatial_index += + lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; + rhs_linear_spatial_index += + (window_dim.window_reversal() + ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) + : rhs_spatial_index[ki]) * + rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; + } + for (int64 rhs_iz = 0; rhs_iz < input_feature_group_size; ++rhs_iz) { const int64 iz = feature_group_index * input_feature_group_size + rhs_iz; - int64 lhs_linear_index = 0; + int64 lhs_linear_index = lhs_linear_spatial_index; lhs_linear_index += out_index[output_batch_dim] * lhs_dim_multipliers[input_batch_dim]; lhs_linear_index += iz * lhs_dim_multipliers[input_z_dim]; - int64 rhs_linear_index = 0; + int64 rhs_linear_index = rhs_linear_spatial_index; rhs_linear_index += out_index[output_z_dim] * rhs_dim_multipliers[kernel_output_z_dim]; rhs_linear_index += rhs_iz * rhs_dim_multipliers[kernel_input_z_dim]; - // Find corresponding spatial dimension index for input (lhs). - for (int64 ki = 0; ki < rhs_spatial_index.size(); ++ki) { - // Spatial dimension number for input (lhs) and output. - const int64 input_spatial_dim = dnums.input_spatial_dimensions(ki); - const int64 output_spatial_dim = - dnums.output_spatial_dimensions(ki); - - // Calculate lhs (input) index without taking base dilation into - // account. - const auto& window_dim = window.dimensions(ki); - const int64 undilated_index = - out_index[output_spatial_dim] * window_dim.stride() - - window_dim.padding_low() + - rhs_spatial_index[ki] * window_dim.window_dilation(); - // Skip if the lhs (input) index is to be dilated. As an - // optimization, skip this mod if there's no dilation. - if (window_dim.base_dilation() > 1 && - undilated_index % window_dim.base_dilation() != 0) { - goto cnt; - } - - // Calculate the actual lhs (input) index after dilation. As an - // optimization, skip this integer divide if there's no dilation. - int64 lhs_spatial_index; - if (window_dim.base_dilation() > 1) { - lhs_spatial_index = undilated_index / window_dim.base_dilation(); - } else { - lhs_spatial_index = undilated_index; - } - lhs_linear_index += - lhs_spatial_index * lhs_dim_multipliers[input_spatial_dim]; - - // Skip if input index is not in bounds. - if (!(lhs_spatial_index >= 0 && - lhs_spatial_index < - lhs_shape.dimensions(input_spatial_dim))) { - goto cnt; - } - - rhs_linear_index += - (window_dim.window_reversal() - ? ((window_dim.size() - 1) - rhs_spatial_index[ki]) - : rhs_spatial_index[ki]) * - rhs_dim_multipliers[dnums.kernel_spatial_dimensions(ki)]; - } - result_val += static_cast(lhs_literal_data[lhs_linear_index]) * static_cast(rhs_literal_data[rhs_linear_index]); @@ -2613,8 +2613,17 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault { std::vector base_index(rank); bool out_of_bound = false; for (int64 i = 0; i < rank; ++i) { - base_index[i] = window_count_index[i] * window.dimensions(i).stride() + - window_index[i] - window.dimensions(i).padding_low(); + base_index[i] = + window_count_index[i] * window.dimensions(i).stride() + + window_index[i] * window.dimensions(i).window_dilation() - + window.dimensions(i).padding_low(); + // We are not in the base area if the dilation placed us out of bounds. + if (base_index[i] % window.dimensions(i).base_dilation() != 0) { + out_of_bound = true; + break; + } + // Apply the dilation to the base area. + base_index[i] /= window.dimensions(i).base_dilation(); if (base_index[i] < 0 || base_index[i] >= base_shape.dimensions(i)) { out_of_bound = true; break; diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc new file mode 100644 index 0000000000000000000000000000000000000000..8128fad07ca0b9c3883ed93c6e1c8e977e990cb4 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.cc @@ -0,0 +1,182 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" + +namespace xla { +Status HloInputOutputAliasConfig::SetUpAlias(const ShapeIndex& output_index, + int64 param_number, + const ShapeIndex& param_index) { + TF_RET_CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)) + << absl::StrCat("Tring to set up alias at ", output_index.ToString(), + " which is an invalid index for shape ", + ShapeUtil::HumanString(alias_.shape())); + // Output can't be aliased with multiple parameters. + TF_RET_CHECK(!alias_.element(output_index)) << absl::StrFormat( + "Trying to set up output alias for param %lld at %s but failed: output " + "index %s is already aliased with param %lld at %s", + param_number, param_index.ToString(), output_index.ToString(), + alias_.element(output_index)->first, + alias_.element(output_index)->second.ToString()); + (*alias_.mutable_element(output_index)) = + std::make_pair(param_number, param_index); + return Status::OK(); +} + +HloInputOutputAliasProto HloInputOutputAliasConfig::ToProto() const { + HloInputOutputAliasProto result; + alias_.ForEachElement( + [&](const ShapeIndex& index, + const absl::optional>& data) { + if (data) { + HloInputOutputAliasProto::AliasEntryProto entry; + for (int64 i : index) { + entry.add_output_shape_index(i); + } + entry.set_parameter_number(data->first); + for (int64 i : data->second) { + entry.add_parameter_shape_index(i); + } + result.add_entries()->Swap(&entry); + } + }); + return result; +} + +StatusOr HloInputOutputAliasConfig::CreateFromProto( + const Shape& output_shape, const HloInputOutputAliasProto& proto) { + HloInputOutputAliasConfig result(output_shape); + for (const HloInputOutputAliasProto::AliasEntryProto& entry : + proto.entries()) { + ShapeIndex output_index(entry.output_shape_index().begin(), + entry.output_shape_index().end()); + + int64 param_number = entry.parameter_number(); + ShapeIndex param_index(entry.parameter_shape_index().begin(), + entry.parameter_shape_index().end()); + TF_RETURN_IF_ERROR( + result.SetUpAlias(output_index, param_number, param_index)); + } + + return result; +} + +string HloInputOutputAliasConfig::ToString() const { + std::vector pieces; + pieces.push_back("HloInputOutputAliasConfig"); + + ForEachAlias([&](const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index) { + pieces.push_back(absl::StrFormat( + " OutputIndex %s is aliased with parameter %lld at %s:", + output_index.ToString(), param_number, param_index.ToString())); + }); + + return absl::StrJoin(pieces, "\n"); +} + +bool HloInputOutputAliasConfig::ParameterHasAlias( + int64 param_number, const ShapeIndex& param_index) const { + bool output = false; + alias_.ForEachElement( + [&](const xla::ShapeIndex&, + absl::optional> alias) { + if (alias && alias->first == param_number && + alias->second == param_index) { + output = true; + } + }); + return output; +} + +absl::optional HloInputOutputAliasConfig::GetAliasedOutput( + int64 param_number, const ShapeIndex& param_index) const { + absl::optional output; + alias_.ForEachElement( + [&](const xla::ShapeIndex& output_index, + absl::optional> alias) { + if (alias && alias->first == param_number && + alias->second == param_index) { + output = output_index; + } + }); + return output; +} + +absl::optional> +HloInputOutputAliasConfig::GetAliasedParameter( + const ShapeIndex& output_index) const { + CHECK(ShapeUtil::IndexIsValid(alias_.shape(), output_index)); + return alias_.element(output_index); +} + +void HloInputOutputAliasConfig::ForEachAlias(AliasFn fn) const { + alias_.ForEachElement( + [&](const ShapeIndex& output_index, + absl::optional> aliased) { + if (aliased) { + fn(output_index, aliased->first, aliased->second); + } + }); +} + +Status HloInputOutputAliasConfig::ForEachAliasWithStatus( + AliasFnWithStatus fn) const { + return alias_.ForEachElementWithStatus( + [&](const ShapeIndex& output_index, + absl::optional> aliased) { + if (aliased) { + TF_RETURN_IF_ERROR(fn(output_index, aliased->first, aliased->second)); + } + return Status::OK(); + }); +} + +Status HloInputOutputAliasConfig::Verify(const HloModule& module) const { + std::vector> param_has_seen; + const HloComputation* entry = module.entry_computation(); + for (int64 i = 0; i < entry->num_parameters(); ++i) { + HloInstruction* param = entry->parameter_instruction(i); + param_has_seen.emplace_back(param->shape()); + } + return ForEachAliasWithStatus([&](const ShapeIndex& output_index, + int64 param_number, + const ShapeIndex& param_index) -> Status { + const HloInstruction* root = entry->root_instruction(); + + const Shape& param_shape = + entry->parameter_instruction(param_number)->shape(); + const Shape& output_shape = root->shape(); + TF_RET_CHECK(entry->num_parameters() > param_number); + TF_RET_CHECK(ShapeUtil::IndexIsValid(param_shape, param_index)); + TF_RET_CHECK(ShapeUtil::IndexIsValid(output_shape, output_index)); + + // Check each param_number and param_index pair only show up once. No + // input can be aliased with output buffers. + TF_RET_CHECK(param_has_seen[param_number].element(param_index) == false); + + *(param_has_seen[param_number].mutable_element(param_index)) = true; + + return Status::OK(); + }); +} + +std::ostream& operator<<(std::ostream& out, + const HloInputOutputAliasConfig& config) { + out << config.ToString(); + return out; +} +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h new file mode 100644 index 0000000000000000000000000000000000000000..0fae75842ba28da5dcb59e5952cd60c1d1c5ea68 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config.h @@ -0,0 +1,102 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ + +#include + +#include "absl/types/optional.h" +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/shape_tree.h" +#include "tensorflow/compiler/xla/shape_util.h" + +namespace xla { + +class HloModule; + +// This class specifies the alias map from output index to parameter number and +// parameter index in the entry computation. +class HloInputOutputAliasConfig { + public: + HloInputOutputAliasConfig() = default; + + explicit HloInputOutputAliasConfig(Shape shape) : alias_(shape) {} + + virtual ~HloInputOutputAliasConfig() = default; + + // Sets up alias config from `output_index` to `param_index` at + // `param_number`. + Status SetUpAlias(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index); + + // Returns true if the given parameter is aliased with one of the output + // buffers. + bool ParameterHasAlias(int64 param_number, + const ShapeIndex& param_index) const; + + // (De)Serializes an HloInputOutoutAliasConfig to/from an + // HloInputOutoutAliasProto. + HloInputOutputAliasProto ToProto() const; + + static StatusOr CreateFromProto( + const Shape& output_shape, const HloInputOutputAliasProto& proto); + + // Returns the output index that the given parameter and parameter index is + // aliased with. A nullopt is returned if there is no output that is aliased + // with the parameter number and index. + absl::optional GetAliasedOutput( + int64 param_number, const ShapeIndex& param_index) const; + + // Returns the number of parameter and index of the parameter buffer that the + // given output buffer index is aliased with. A nullopt is returned if there + // is no parameter is aliased with the specific output. + absl::optional> GetAliasedParameter( + const ShapeIndex& output_index) const; + + using AliasFn = + std::function; + + // Iterates through each aliased output and input. + void ForEachAlias(AliasFn fn) const; + + using AliasFnWithStatus = + std::function; + + // Verifies that the given config is valid for the given module. + // Specifically, the config's input and output should be in-bound and size of + // the aliased buffers should match. + Status Verify(const HloModule& module) const; + + Status ForEachAliasWithStatus(AliasFnWithStatus fn) const; + + string ToString() const; + + private: + // A ShapeTree which indicates the list of buffers that's expected to be + // aliased. The key on this shape tree represents the output index. The value + // is a pair of parameter number and index into the buffer. If the value is + // nullopt, it means there is no parameter aliasing for this output. + ShapeTree>> alias_; +}; + +std::ostream& operator<<(std::ostream& out, + const HloInputOutputAliasConfig& config); + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_INPUT_OUTPUT_ALIAS_CONFIG_H_ diff --git a/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..3b61ff04e6d7eeaa5876775fa18a85af82164b3d --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_input_output_alias_config_test.cc @@ -0,0 +1,184 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" + +#include +#include + +#include "absl/algorithm/container.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dce.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_memory_scheduler.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_ordering.h" +#include "tensorflow/compiler/xla/service/hlo_parser.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace xla { +namespace { +class HloInputOutputAliasConfigTest : public HloTestBase { + protected: + void expect_aliased(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + const HloInputOutputAliasConfig& config) { + absl::optional aliased_output = + config.GetAliasedOutput(param_number, param_index); + + EXPECT_TRUE(aliased_output); + EXPECT_EQ(aliased_output.value(), output_index); + + absl::optional> aliased_param = + config.GetAliasedParameter(output_index); + + EXPECT_TRUE(aliased_param); + EXPECT_EQ(aliased_param.value(), std::make_pair(param_number, param_index)); + } + + void expect_not_aliased(const ShapeIndex& output_index, int64 param_number, + const ShapeIndex& param_index, + const HloInputOutputAliasConfig& config) { + absl::optional aliased_output = + config.GetAliasedOutput(param_number, param_index); + + EXPECT_FALSE(aliased_output && aliased_output == output_index); + + absl::optional> aliased_param = + config.GetAliasedParameter(output_index); + + EXPECT_FALSE(aliased_param && aliased_param->first == param_number && + aliased_param->second == param_index); + } +}; + +TEST_F(HloInputOutputAliasConfigTest, SimpleAliasing) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{})); + + expect_aliased(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{}, config); +} + +TEST_F(HloInputOutputAliasConfigTest, SimpleAliasingWithTupleInput) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + param = (f32[], f32[]) parameter(0) + gte1 = f32[] get-tuple-element(%param), index=0 + gte2 = f32[] get-tuple-element(%param), index=1 + ROOT root = (f32[], f32[]) tuple(%gte1, %gte2) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{0})); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{1})); + + expect_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{0}, config); + + expect_aliased(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{1}, config); + + expect_not_aliased(/*output_index=*/{1}, /*param_number=*/1, + /*param_index=*/{}, config); + + expect_not_aliased(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{}, config); +} + +TEST_F(HloInputOutputAliasConfigTest, InputDoNotAliasTwice) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{})); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{1}, /*param_number=*/0, + /*param_index=*/{})); + + ASSERT_IS_NOT_OK(config.Verify(*module)); +} + +TEST_F(HloInputOutputAliasConfigTest, OutputDoNotAliasTwice) { + const string module_str = R"( +HloModule TEST + +ENTRY main { + a = f32[] parameter(0) + b = f32[] parameter(1) + ROOT root = (f32[], f32[]) tuple(%a, %b) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(module_str)); + + HloInputOutputAliasConfig config( + module->entry_computation()->root_instruction()->shape()); + + TF_ASSERT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/0, + /*param_index=*/{})); + + ASSERT_IS_NOT_OK(config.SetUpAlias(/*output_index=*/{0}, /*param_number=*/1, + /*param_index=*/{})); +} +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 2f6db7cd7c0ada166dc81f75c4a9989eb9d70638..b6df63c983d7297cb26b9cf528f41fa54a343cd7 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -195,17 +195,16 @@ StatusOr> HloInstruction::CreateFromProto( } break; case HloOpcode::kSort: { - TF_RET_CHECK(proto.operand_ids_size() == 1 || - proto.operand_ids_size() == 2) - << "Sort instruction should have 1 or 2 operands but has " + TF_RET_CHECK(proto.operand_ids_size() >= 1) + << "Sort instruction should have at least 1 operand but has " << proto.operand_ids_size(); TF_RET_CHECK(proto.dimensions().size() == 1) << "Sort instruction should have 1 dimension"; - HloInstruction* keys = operands(0); - HloInstruction* values = - proto.operand_ids_size() == 2 ? operands(1) : nullptr; - instruction = - CreateSort(proto.shape(), proto.dimensions(0), keys, values); + auto sort_operands = all_operands(); + HloInstruction* keys = sort_operands[0]; + instruction = CreateSort( + proto.shape(), proto.dimensions(0), keys, + absl::Span(sort_operands).subspan(1)); break; } case HloOpcode::kTranspose: @@ -305,6 +304,9 @@ StatusOr> HloInstruction::CreateFromProto( proto.tuple_index()); break; case HloOpcode::kReducePrecision: + TF_RET_CHECK(proto.operand_ids_size() == 1) + << "ReducePrecision instruction should have 1 operand but sees " + << proto.operand_ids_size(); instruction = CreateReducePrecision(proto.shape(), operands(0), proto.exponent_bits(), proto.mantissa_bits()); @@ -312,12 +314,16 @@ StatusOr> HloInstruction::CreateFromProto( case HloOpcode::kInfeed: { const Shape& data_shape = ShapeUtil::GetTupleElementShape(proto.shape(), 0); - TF_RET_CHECK(proto.operand_ids_size() == 1); + TF_RET_CHECK(proto.operand_ids_size() == 1) + << "Infeed instruction should have 1 operand but sees " + << proto.operand_ids_size(); instruction = CreateInfeed(data_shape, operands(0), proto.infeed_config()); } break; case HloOpcode::kOutfeed: - TF_RET_CHECK(proto.operand_ids_size() == 2); + TF_RET_CHECK(proto.operand_ids_size() == 2) + << "Outfeed instruction should have 2 operands but sees " + << proto.operand_ids_size(); TF_RETURN_IF_ERROR( ShapeUtil::ValidateShapeWithOptionalLayout(proto.outfeed_shape())); instruction = CreateOutfeed(proto.outfeed_shape(), operands(0), @@ -349,6 +355,9 @@ StatusOr> HloInstruction::CreateFromProto( break; } case HloOpcode::kCollectivePermute: { + TF_RET_CHECK(proto.operand_ids_size() == 1) + << "CollectivePermute instruction should have 1 operand but sees " + << proto.operand_ids_size(); std::vector> source_target_pairs( proto.source_target_pairs_size()); for (int i = 0; i < source_target_pairs.size(); i++) { @@ -396,9 +405,22 @@ StatusOr> HloInstruction::CreateFromProto( operands(1), operands(2), computations(1)); break; case HloOpcode::kCustomCall: - instruction = CreateCustomCall(proto.shape(), all_operands(), - proto.custom_call_target(), - proto.custom_call_opaque()); + if (proto.constrain_layout()) { + // A proto RepeatedPtrField cannot be converted to a Span (it is a + // vector of pointers essentially) so create a vector of shapes to pass + // in. + std::vector operand_shapes; + for (const Shape& shape : proto.operand_shapes_with_layout()) { + operand_shapes.push_back(shape); + } + instruction = CreateCustomCall( + proto.shape(), all_operands(), proto.custom_call_target(), + operand_shapes, proto.custom_call_opaque()); + } else { + instruction = CreateCustomCall(proto.shape(), all_operands(), + proto.custom_call_target(), + proto.custom_call_opaque()); + } if (proto.has_window()) { static_cast(instruction.get()) ->set_window(proto.window()); @@ -1055,7 +1077,7 @@ HloInstruction::CreateBroadcastSequence( /* static */ std::unique_ptr HloInstruction::CreateSort( const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values) { + absl::Span values) { return absl::make_unique(shape, dimension, keys, values); } @@ -1142,6 +1164,15 @@ bool HloInstruction::HasSideEffect() const { shape, operands, custom_call_target, opaque); } +/* static */ std::unique_ptr HloInstruction::CreateCustomCall( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, + absl::Span operand_shapes_with_layout, + absl::string_view opaque) { + return absl::make_unique( + shape, operands, custom_call_target, opaque, operand_shapes_with_layout); +} + /* static */ std::unique_ptr HloInstruction::CreateTuple( absl::Span elements) { std::vector element_shapes; @@ -2646,7 +2677,6 @@ Status HloInstruction::AcceptOrdered( } const Shape& HloInstruction::shape() const { - TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(shape_)); return shape_; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 374862c4b672bf4cb7c6e3dbc60392a1018520b7..15a4da8dbe0053aad314989a6718ebd61532ab8b 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -28,11 +28,10 @@ limitations under the License. #include #include #include -#include -#include #include #include "absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_set.h" #include "absl/container/inlined_vector.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" @@ -464,7 +463,7 @@ class HloInstruction { // the same all_reduce_id, they will be 'Allreduce'd. If empty, Allreduce will // not be applied cross modules. // - // TODO(b/79737069): Rename this to AllReduce. + // TODO(b/117564385): Rename this to AllReduce. static std::unique_ptr CreateCrossReplicaSum( const Shape& shape, absl::Span operands, HloComputation* reduce_computation, @@ -670,10 +669,10 @@ class HloInstruction { const Shape& shape, HloInstruction* operand, absl::Span dimensions); - // Creates a sort op, with a keys operand, and an optional values operand. + // Creates a sort op, with a keys operand, and optional values operands. static std::unique_ptr CreateSort( const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values = nullptr); + absl::Span values = {}); // Creates a while instruction, given a condition computation, a body // computation, and the initial value for the input of the computations. For @@ -734,6 +733,16 @@ class HloInstruction { const Shape& shape, absl::Span operands, absl::string_view custom_call_target, absl::string_view opaque = ""); + // Overload which constrains the layouts of the operand and result. 'shape' + // and 'operand_shapes_with_layout' must have layouts. + // 'operand_shapes_with_layout' must have a compatible element for each + // operand. + static std::unique_ptr CreateCustomCall( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, + absl::Span operand_shapes_with_layout, + absl::string_view opaque = ""); + // Creates a tuple instruction with the given elements. This is a convenience // wrapper around CreateVariadic. static std::unique_ptr CreateTuple( @@ -1635,7 +1644,7 @@ class HloInstruction { // members. The set enables fast membership testing and the vector enables // fast, stable iteration. std::vector users_; - std::unordered_set user_set_; + absl::flat_hash_set user_set_; // The set of control successors of this instruction. std::vector control_successors_; diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index c1b7c3832b44b5d65b715dffa5211a5c92e17953..d93351fe0435b5f29035dc4ea0621a8c576bfd5a 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -135,7 +135,8 @@ TEST_F(HloInstructionTest, BasicProperties) { auto parameter = HloInstruction::CreateParameter(1, r0f32_, "foo"); EXPECT_EQ(HloOpcode::kParameter, parameter->opcode()); - EXPECT_TRUE(ShapeUtil::IsScalarF32(parameter->shape())); + EXPECT_TRUE(ShapeUtil::IsScalarWithElementType(parameter->shape(), F32)); + EXPECT_FALSE(ShapeUtil::IsScalarWithElementType(parameter->shape(), S32)); EXPECT_EQ(0, parameter->operand_count()); } diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index 152d8eacdb591a31afcbbf7f9f01d51864c929f0..179ace2cdb76051fecdeb7e0cbdcd808bf9fee25 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -600,11 +600,11 @@ std::unique_ptr HloReduceInstruction::CloneWithNewOperandsImpl( HloSortInstruction::HloSortInstruction(const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values) + absl::Span values) : HloInstruction(HloOpcode::kSort, shape), dimensions_({dimension}) { AppendOperand(keys); - if (values) { - AppendOperand(values); + for (auto* value : values) { + AppendOperand(value); } } @@ -633,9 +633,8 @@ std::unique_ptr HloSortInstruction::CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const { HloInstruction* keys = new_operands[0]; - HloInstruction* values = new_operands.size() == 2 ? new_operands[1] : nullptr; return absl::make_unique(shape, dimensions(0), keys, - values); + new_operands.subspan(1)); } HloTransposeInstruction::HloTransposeInstruction( @@ -1825,7 +1824,24 @@ HloCustomCallInstruction::HloCustomCallInstruction( : HloInstruction(HloOpcode::kCustomCall, shape), custom_call_target_(custom_call_target.begin(), custom_call_target.end()), opaque_(opaque.begin(), opaque.end()), - feature_group_count_(1) { + feature_group_count_(1), + layout_constrained_(false) { + for (auto operand : operands) { + AppendOperand(operand); + } +} + +HloCustomCallInstruction::HloCustomCallInstruction( + const Shape& shape, absl::Span operands, + absl::string_view custom_call_target, absl::string_view opaque, + absl::Span operand_shapes_with_layout) + : HloInstruction(HloOpcode::kCustomCall, shape), + custom_call_target_(custom_call_target.begin(), custom_call_target.end()), + opaque_(opaque.begin(), opaque.end()), + feature_group_count_(1), + layout_constrained_(true), + operand_shapes_with_layout_(operand_shapes_with_layout.begin(), + operand_shapes_with_layout.end()) { for (auto operand : operands) { AppendOperand(operand); } @@ -1843,6 +1859,12 @@ HloInstructionProto HloCustomCallInstruction::ToProto() const { proto.set_custom_call_target(custom_call_target_); proto.set_custom_call_opaque(opaque_); proto.set_feature_group_count(feature_group_count_); + if (layout_constrained()) { + proto.set_constrain_layout(true); + for (const Shape& shape : operand_shapes_with_layout_) { + *proto.add_operand_shapes_with_layout() = shape; + } + } return proto; } @@ -1870,6 +1892,14 @@ std::vector HloCustomCallInstruction::ExtraAttributesToStringImpl( if (!opaque_.empty()) { extra.push_back(StrCat("opaque=\"", CEscape(opaque_), "\"")); } + if (layout_constrained()) { + std::vector shape_strings; + for (const Shape& shape : operand_shapes_with_layout_) { + shape_strings.push_back(ShapeUtil::HumanStringWithLayout(shape)); + } + extra.push_back(StrCat("operand_layout_constraints={", + StrJoin(shape_strings, ", "), "}")); + } return extra; } diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index e169604072a6d23c5e601fcbe00b7a7bf37a933d..5f06dc093248e1d4d36ec845ced1e68c2b9d0752 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -418,14 +418,19 @@ class HloSortInstruction : public HloInstruction { public: explicit HloSortInstruction(const Shape& shape, int64 dimension, HloInstruction* keys, - HloInstruction* values = nullptr); + absl::Span values = {}); // Returns the dimension sizes or numbers associated with this instruction. const std::vector& dimensions() const override { return dimensions_; } int64 dimensions(int64 index) const override { return dimensions()[index]; } // Returns the sort dimension for this instruction - int64 sort_dimension() { return dimensions(0); } + int64 sort_dimension() const { return dimensions(0); } // Returns a serialized representation of this instruction. HloInstructionProto ToProto() const override; + // Returns the key operand to this instruction. + const HloInstruction* keys() const { return operand(0); } + HloInstruction* mutable_keys() { return mutable_operand(0); } + // Returns the number of value operands. + int64 values_count() const { return operand_count() - 1; } private: std::vector ExtraAttributesToStringImpl( @@ -1053,10 +1058,19 @@ class HloSelectAndScatterInstruction : public HloInstruction { class HloCustomCallInstruction : public HloInstruction { public: - explicit HloCustomCallInstruction(const Shape& shape, - absl::Span operands, - absl::string_view custom_call_target, - absl::string_view opaque); + HloCustomCallInstruction(const Shape& shape, + absl::Span operands, + absl::string_view custom_call_target, + absl::string_view opaque); + + // Constructor for a custom call with constrained layout. 'shape' and + // 'operands_with_layout' must all have layouts. + HloCustomCallInstruction(const Shape& shape, + absl::Span operands, + absl::string_view custom_call_target, + absl::string_view opaque, + absl::Span operand_shapes_with_layout); + const Window& window() const override { CHECK(window_ != nullptr); return *window_; @@ -1085,6 +1099,16 @@ class HloCustomCallInstruction : public HloInstruction { // Returns a serialized representation of this instruction. HloInstructionProto ToProto() const override; + // Returns whether the result and operand layouts are constrained. + bool layout_constrained() const { return layout_constrained_; } + + // Returns the shapes (with layout) of the operands. CHECKs if this custom + // call does not have constrained layouts. + const std::vector& operand_shapes_with_layout() const { + CHECK(layout_constrained()); + return operand_shapes_with_layout_; + } + private: std::vector ExtraAttributesToStringImpl( const HloPrintOptions& options) const override; @@ -1106,6 +1130,11 @@ class HloCustomCallInstruction : public HloInstruction { std::unique_ptr convolution_dimension_numbers_; // The number of feature groups. This is used for grouped convolutions. int64 feature_group_count_; + // Whether the result and operand layouts are constrained. + bool layout_constrained_; + // For layout-constrained custom calls, this vector holds the shape with + // layout for each operand. + std::vector operand_shapes_with_layout_; }; class HloPadInstruction : public HloInstruction { diff --git a/tensorflow/compiler/xla/service/hlo_lexer.cc b/tensorflow/compiler/xla/service/hlo_lexer.cc index d9be841dd751651ba029998fd062fcaec3691945..971a9a20636c80820306d512af9e7ff4a14b79b5 100644 --- a/tensorflow/compiler/xla/service/hlo_lexer.cc +++ b/tensorflow/compiler/xla/service/hlo_lexer.cc @@ -204,7 +204,7 @@ TokKind HloLexer::LexIdentifier() { auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end()); // 'consumable' will be advanced iff its prefix matches the pattern. static LazyRE2 shape_pattern = { - R"(^(\w*\d*)\[([\d,]*)\](?:(dense|sparse)?{([\d,]+)})?)"}; + R"(^(\w*\d*)\[([\d,\s]*)\](?:(dense|sparse)?{([\d,\s]+)})?)"}; if (RE2::Consume(&consumable, *shape_pattern)) { auto status_or_shape = ShapeUtil::ParseShapeString( StringPieceFromPointers(token_start_, consumable.begin())); diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 5502e565b6dfbaca6cfa2101950fb0a68c89771f..1717770301e3666b0a1c23d20b7f2e3bac5f62e4 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -179,6 +179,7 @@ HLO_MATCHER(Convolution); HLO_MATCHER(Copy); HLO_MATCHER(CrossReplicaSum); HLO_MATCHER(Divide); +HLO_MATCHER(Domain); HLO_MATCHER(DynamicSlice); HLO_MATCHER(DynamicUpdateSlice); HLO_MATCHER(Eq); @@ -216,6 +217,7 @@ HLO_MATCHER(Remainder); HLO_MATCHER(Reshape); HLO_MATCHER(Reverse); HLO_MATCHER(Rng); +HLO_MATCHER(Scatter); HLO_MATCHER(Select); HLO_MATCHER(SelectAndScatter); HLO_MATCHER(Send); diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 93e04eb3db47ba3dadfbd412733997b92c07da92..6845c27a91845ef971dc2d82266200bfccb25533 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -73,6 +73,8 @@ HloComputation* HloModule::AddComputationInternal( config_.SetDefaultComputationLayout( entry_computation_->ComputeProgramShape()); } + input_output_alias_config_ = HloInputOutputAliasConfig( + entry_computation_->root_instruction()->shape()); } if (uniquify_identifiers) { @@ -244,14 +246,14 @@ HloModuleProto HloModule::ToProto() const { proto.set_entry_computation_id(entry_computation_->unique_id()); for (const HloComputation* computation : MakeComputationPostOrder()) { HloComputationProto computation_proto = computation->ToProto(); - if (computation->name() == entry_computation_->name()) { - *proto.mutable_program_shape() = computation_proto.program_shape(); - } proto.add_computations()->Swap(&computation_proto); } if (has_schedule()) { *proto.mutable_schedule() = schedule().ToProto().ValueOrDie(); } + *proto.mutable_host_program_shape() = + entry_computation_layout().ComputeProgramShape(); + *proto.mutable_input_output_alias() = input_output_alias_config().ToProto(); return proto; } @@ -263,9 +265,9 @@ StatusOr> HloModule::CreateFromProto( // The ProgramShape in the passed in module config must match the shapes of // the entry parameters and root. - TF_RET_CHECK(proto.has_program_shape()) + TF_RET_CHECK(proto.has_host_program_shape()) << "No program shape found in the proto"; - const auto& expected_program_shape = proto.program_shape(); + const auto& expected_program_shape = proto.host_program_shape(); TF_RET_CHECK(expected_program_shape.parameters_size() == module_config.entry_computation_layout().parameter_count()); for (int i = 0; i < expected_program_shape.parameters_size(); ++i) { @@ -328,6 +330,10 @@ StatusOr> HloModule::CreateFromProto( } TF_RET_CHECK(module->entry_computation_ != nullptr); + TF_ASSIGN_OR_RETURN(module->input_output_alias_config_, + HloInputOutputAliasConfig::CreateFromProto( + result_shape, proto.input_output_alias())); + // Because we didn't uniquify the names or the ids, double-check that the // instruction and computation names and ids are unique from the proto. absl::flat_hash_set computation_names; @@ -366,9 +372,9 @@ StatusOr> HloModule::CreateFromProto( /* static */ StatusOr HloModule::CreateModuleConfigFromProto( const HloModuleProto& module, const DebugOptions& debug_options) { - TF_RET_CHECK(module.has_program_shape()) + TF_RET_CHECK(module.has_host_program_shape()) << "No program shape found in the proto"; - const auto& program_shape = module.program_shape(); + const auto& program_shape = module.host_program_shape(); HloModuleConfig module_config(program_shape); module_config.set_debug_options(debug_options); @@ -558,8 +564,13 @@ std::vector HloModule::MakeNonfusionComputations() const { } std::unique_ptr HloModule::Clone(const string& suffix) const { + return Clone(config(), suffix); +} + +std::unique_ptr HloModule::Clone(const HloModuleConfig& config, + const string& suffix) const { VLOG(1) << "Cloning module :" << name_ << " --> " << suffix << "\n"; - auto module = absl::make_unique(name_ + "-" + suffix, config_); + auto module = absl::make_unique(name_ + "-" + suffix, config); HloCloneContext context(module.get(), suffix); auto cloned_computation = entry_computation_->Clone(suffix, &context); diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 735804e827afd77e2b7f2a4a7d490ee6f5ee7b4f..5dc795fabec5d8d794635ef6965c4d065b0b75a6 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo.pb.h" #include "tensorflow/compiler/xla/service/hlo_clone_context.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_input_output_alias_config.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module_config.h" #include "tensorflow/compiler/xla/service/hlo_schedule.h" @@ -92,6 +93,8 @@ class HloModule { // Returns a deep copy of this module including all computations. std::unique_ptr Clone(const string& suffix = "clone") const; + std::unique_ptr Clone(const HloModuleConfig& config, + const string& suffix = "clone") const; // Performs a deep clone of the computation, by recursively cloning all // the called computations as well. If the clone context is specified, it @@ -99,7 +102,7 @@ class HloModule { HloComputation* DeepCloneComputation(HloComputation* computation, HloCloneContext* context = nullptr); - // Return a pointer to the entry computation of the module.. + // Return a pointer to the entry computation of the module. const HloComputation* entry_computation() const { CHECK_NE(nullptr, entry_computation_); return entry_computation_; @@ -109,6 +112,14 @@ class HloModule { return entry_computation_; } + // Returns the root instruction shape of entry computation. + // + // Precondition: entry_computation_ is not nullptr. + const Shape& result_shape() const { + CHECK_NE(nullptr, entry_computation_); + return entry_computation()->root_instruction()->shape(); + } + // Creates the ComputationLayout which describes the current status of the HLO // module entry computation. ComputationLayout compute_computation_layout() const { @@ -212,9 +223,14 @@ class HloModule { return result; } - // Returns the number of unique intruction ids given out. All ids up to - // this point are guaranteed to be in the range [0..NumUniqueInstructionIds()) - int NumUniqueInstructionIds() const { return next_unique_id_; } + // input_output_alias_config indicates the list of aliased buffers that are + // expected from the module. + HloInputOutputAliasConfig& input_output_alias_config() { + return input_output_alias_config_; + } + const HloInputOutputAliasConfig& input_output_alias_config() const { + return input_output_alias_config_; + } // Returns an id that is unique to this module across all modules created over // the lifetime of this process. @@ -284,6 +300,10 @@ class HloModule { // sequential order of instructions for each non-fusion computation in the // module. absl::optional schedule_; + + // alias_config indicates the alias information of input/output buffers that + // are expected from the module. + HloInputOutputAliasConfig input_output_alias_config_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module_group.cc b/tensorflow/compiler/xla/service/hlo_module_group.cc index f9b56ef4643f2ca88e56456ae6c990161adb5085..8999ac9f324ed24cf34ef6826000e1fa4f741e19 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group.cc +++ b/tensorflow/compiler/xla/service/hlo_module_group.cc @@ -17,9 +17,8 @@ limitations under the License. namespace xla { -HloModuleGroup::HloModuleGroup(absl::string_view name, - std::unique_ptr module) - : name_(name) { +HloModuleGroup::HloModuleGroup(std::unique_ptr module) + : name_(module->name()) { push_back(std::move(module)); } diff --git a/tensorflow/compiler/xla/service/hlo_module_group.h b/tensorflow/compiler/xla/service/hlo_module_group.h index 7338be8b9c5ed47f0ba5829cc1d603b21f00b6e0..7c39cf17815aa08742e6d5b35941d8043531d034 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group.h +++ b/tensorflow/compiler/xla/service/hlo_module_group.h @@ -35,7 +35,7 @@ class HloModuleGroup { explicit HloModuleGroup(absl::string_view name) : name_(name) {} // Construct a module group containing a single module. - HloModuleGroup(absl::string_view name, std::unique_ptr module); + explicit HloModuleGroup(std::unique_ptr module); // Construct a module group containing any number of modules. HloModuleGroup(absl::string_view name, @@ -50,11 +50,16 @@ class HloModuleGroup { // Add a module to the back of vector of modules in the group. void push_back(std::unique_ptr module); + // Replaces the existing module at the given index with the given module. The + // existing module is discarded. + void ReplaceModule(int index, std::unique_ptr module); + // Moves all modules from the group into the returned vector. After this // method runs, the module group will be empty. std::vector> ConsumeModules(); string name() const { return name_; } + string ToString() const; // Serialize the module group to/from a proto. @@ -63,6 +68,12 @@ class HloModuleGroup { const HloModuleGroupProto& proto, absl::Span module_configs); + // Returns the number of modules in the module group. + int size() const { return modules_.size(); } + + // Returns true if there are no modules in the module group. + bool empty() const { return modules_.empty(); } + private: string name_; diff --git a/tensorflow/compiler/xla/service/hlo_module_group_test.cc b/tensorflow/compiler/xla/service/hlo_module_group_test.cc index b7b12cb72b8df4610b964fb842da78e160d22d9f..5a9a86af5649bf240bb5de6d30fc80b0f6a58eba 100644 --- a/tensorflow/compiler/xla/service/hlo_module_group_test.cc +++ b/tensorflow/compiler/xla/service/hlo_module_group_test.cc @@ -46,7 +46,7 @@ ENTRY %entry (x: f32[], y: f32[]) -> f32[] { )"; TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, ParseHloString(text)); - HloModuleGroup group(TestName(), std::move(module)); + HloModuleGroup group(std::move(module)); EXPECT_EQ(group.modules().size(), 1); EXPECT_THAT( diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index dd62988bccf7a0b2daa0bd39fc642452c768fceb..81f091238e5725f64b953f70b82d52cc90aef8ea 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -174,6 +174,7 @@ class HloParser { kDistribution, kDomain, kPrecisionList, + kShapeList }; struct AttrConfig { @@ -240,6 +241,7 @@ class HloParser { bool ParseSliceRanges(SliceRanges* result); bool ParsePrecisionList(std::vector* result); + bool ParseShapeList(std::vector* result); bool ParseInt64List(const TokKind start, const TokKind end, const TokKind delim, std::vector* result); @@ -837,8 +839,6 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, break; } case HloOpcode::kSort: { - auto loc = lexer_.GetLoc(); - optional> dimensions; attrs["dimensions"] = {/*required=*/true, AttrTy::kBracedInt64List, &dimensions}; @@ -846,20 +846,10 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, dimensions->size() != 1) { return false; } - switch (operands.size()) { - case 1: - instruction = builder->AddInstruction(HloInstruction::CreateSort( - shape, dimensions->at(0), /*keys=*/operands[0])); - break; - case 2: - instruction = builder->AddInstruction(HloInstruction::CreateSort( - shape, dimensions->at(0), - /*keys=*/operands[0], /*values=*/operands[1])); - break; - default: - return Error(loc, StrCat("expects either 1 or 2 operands, but has ", - operands.size(), " operands")); - } + instruction = builder->AddInstruction(HloInstruction::CreateSort( + shape, dimensions->at(0), + /*keys=*/operands[0], + /*values=*/absl::Span(operands).subspan(1))); break; } case HloOpcode::kTuple: { @@ -1341,6 +1331,7 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, optional window; optional dnums; optional feature_group_count; + optional> operand_layout_constraints; attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString, &custom_call_target}; attrs["opaque"] = {/*required=*/false, AttrTy::kString, &opaque}; @@ -1349,12 +1340,52 @@ bool HloParser::ParseInstruciontRhs(HloComputation::Builder* builder, AttrTy::kConvolutionDimensionNumbers, &dnums}; attrs["feature_group_count"] = {/*required=*/false, AttrTy::kInt64, &feature_group_count}; + attrs["operand_layout_constraints"] = { + /*required=*/false, AttrTy::kShapeList, &operand_layout_constraints}; if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } - instruction = builder->AddInstruction( - HloInstruction::CreateCustomCall(shape, operands, *custom_call_target, - opaque.has_value() ? *opaque : "")); + if (operand_layout_constraints.has_value()) { + if (!LayoutUtil::HasLayout(shape)) { + return Error(lexer_.GetLoc(), + "Layout must be set on layout-constrained custom call"); + } + if (operands.size() != operand_layout_constraints->size()) { + return Error(lexer_.GetLoc(), + StrCat("Expected ", operands.size(), + " operand layout constraints, ", + operand_layout_constraints->size(), " given")); + } + for (int64 i = 0; i < operands.size(); ++i) { + const Shape& operand_shape_with_layout = + (*operand_layout_constraints)[i]; + if (!LayoutUtil::HasLayout(operand_shape_with_layout)) { + return Error(lexer_.GetLoc(), + StrCat("Operand layout constraint shape ", + ShapeUtil::HumanStringWithLayout( + operand_shape_with_layout), + " for operand ", i, " does not have a layout")); + } + if (!ShapeUtil::Compatible(operand_shape_with_layout, + operands[i]->shape())) { + return Error( + lexer_.GetLoc(), + StrCat( + "Operand layout constraint shape ", + ShapeUtil::HumanStringWithLayout(operand_shape_with_layout), + " for operand ", i, + " is not compatible with operand shape ", + ShapeUtil::HumanStringWithLayout(operands[i]->shape()))); + } + } + instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( + shape, operands, *custom_call_target, *operand_layout_constraints, + opaque.has_value() ? *opaque : "")); + } else { + instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( + shape, operands, *custom_call_target, + opaque.has_value() ? *opaque : "")); + } if (window.has_value()) { instruction->set_window(*window); } @@ -2533,6 +2564,15 @@ bool HloParser::ParseAttributeHelper( ->emplace(result); return true; } + case AttrTy::kShapeList: { + std::vector result; + if (!ParseShapeList(&result)) { + return false; + } + static_cast>*>(attr_out_ptr) + ->emplace(result); + return true; + } } }(); if (!success) { @@ -2825,6 +2865,23 @@ bool HloParser::ParsePrecisionList( parse_and_add_item); } +// shapelist ::= '{' shapes '}' +// precision_elements +// ::= /*empty*/ +// ::= shape (',' shape)* +bool HloParser::ParseShapeList(std::vector* result) { + auto parse_and_add_item = [&]() { + Shape shape; + if (!ParseShape(&shape)) { + return false; + } + result->push_back(std::move(shape)); + return true; + }; + return ParseList(TokKind::kLbrace, TokKind::kRbrace, TokKind::kComma, + parse_and_add_item); +} + // int64list ::= start int64_elements end // int64_elements // ::= /*empty*/ @@ -2832,23 +2889,15 @@ bool HloParser::ParsePrecisionList( bool HloParser::ParseInt64List(const TokKind start, const TokKind end, const TokKind delim, std::vector* result) { - if (!ParseToken(start, StrCat("expects an int64 list starting with ", - TokKindToString(start)))) { - return false; - } - if (lexer_.GetKind() == end) { - // empty - } else { - do { - tensorflow::int64 i; - if (!ParseInt64(&i)) { - return false; - } - result->push_back(i); - } while (EatIfPresent(delim)); - } - return ParseToken( - end, StrCat("expects an int64 list to end with ", TokKindToString(end))); + auto parse_and_add_item = [&]() { + tensorflow::int64 i; + if (!ParseInt64(&i)) { + return false; + } + result->push_back(i); + return true; + }; + return ParseList(start, end, delim, parse_and_add_item); } bool HloParser::ParseList(const TokKind start, const TokKind end, @@ -2933,7 +2982,8 @@ bool HloParser::ParseShape(Shape* result) { } if (lexer_.GetKind() != TokKind::kShape) { - return TokenError("expects shape"); + return TokenError(absl::StrCat("expected shape, saw ", + TokKindToString(lexer_.GetKind()))); } *result = lexer_.GetShapeVal(); lexer_.Lex(); diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index 255123d331c91b1c862980b9248afe9a03d564c8..19f84d8bd28371518e44e38614b8a81fa920985f 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -802,6 +802,43 @@ ENTRY %ConstantUnsignedNoOverflow () -> u64[] { ROOT %constant = u64[] constant(9223372036854775807) } +)" +}, +// CustomCallWithLayoutConstraints +{ +"CustomCallWithLayoutConstraints", +R"(HloModule CustomCallWithLayoutConstraints + +ENTRY %CustomCallWithLayoutConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[123,4]{1,0}} +} + +)" +}, +// CustomCallWithLayoutConstraintsNoOperands +{ +"CustomCallWithLayoutConstraintsNoOperands", +R"(HloModule CustomCallWithLayoutConstraintsNoOperands + +ENTRY %CustomCallWithLayoutConstraints () -> f32[1,2,3] { + ROOT %custom-call = f32[1,2,3]{0,2,1} custom-call(), custom_call_target="baz", operand_layout_constraints={} +} + +)" +}, +// CustomCallWithLayoutConstraintsTupleShapes +{ +"CustomCallWithLayoutConstraintsTupleShapes", +R"(HloModule CustomCallWithLayoutConstraintsTupleShapes + +ENTRY %CustomCallWithLayoutConstraints (p0: (f32[2,2], f32[42,2,3]), p1: f32[123,4]) -> (f32[1,2,3], f32[1,2,3]) { + %p0 = (f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = (f32[1,2,3]{0,2,1}, f32[1,2,3]{1,2,0}) custom-call((f32[2,2]{0,1}, f32[42,2,3]{0,1,2}) %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={(f32[2,2]{1,0}, f32[42,2,3]{2,0,1}), f32[123,4]{1,0}} +} + )" }, }); @@ -966,6 +1003,21 @@ ENTRY Sort { ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}) sort(keys, values), dimensions={0} } +)" +}, +// Sort (Key, Value, Value, Value) +{ +"SortManyValues", +R"(HloModule sort + +ENTRY Sort { + keys = f32[1024,16]{0,1} parameter(0) + values.0 = s32[1024,16]{0,1} parameter(1) + values.1 = u32[1024,16]{0,1} parameter(2) + values.2 = f32[1024,16]{0,1} parameter(3) + ROOT sorted = (f32[1024,16]{0,1}, s32[1024,16]{0,1}, u32[1024,16]{0,1}, f32[1024,16]{0,1}) sort(keys, values.0, values.1, values.2), dimensions={0} +} + )" }, // Conditional @@ -2069,5 +2121,47 @@ ENTRY %axpy.v5 (alpha: f32[], x: f32[2,4], y: f32[2,4]) -> f32[2,4] { op::Broadcast(), op::Multiply(), op::Add())); } +TEST_F(HloParserTest, CustomCallWrongNumberofOperandConstraints) { + const string original = R"(HloModule CustomCallWrongNumberofOperandConstraints + +ENTRY %CustomCallWrongNumberofOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}} +} + +)"; + ExpectHasSubstr(ParseHloString(original).status().error_message(), + "Expected 2 operand layout constraints, 1 given"); +} + +TEST_F(HloParserTest, CustomCallIncompatibleOperandConstraints) { + const string original = R"(HloModule CustomCallIncompatibleOperandConstraints + +ENTRY %CustomCallIncompatibleOperandConstraints (p0: f32[42,2,3], p1: f32[123,4]) -> f32[1,2,3] { + %p0 = f32[42,2,3]{0,1,2} parameter(0) + %p1 = f32[123,4]{0,1} parameter(1) + ROOT %custom-call = f32[1,2,3]{0,1,2} custom-call(f32[42,2,3]{0,1,2} %p0, f32[123,4]{0,1} %p1), custom_call_target="baz", operand_layout_constraints={f32[42,2,3]{0,1,2}, f32[555,5]{1,0}} +} + +)"; + ExpectHasSubstr(ParseHloString(original).status().error_message(), + "operand 1 is not compatible with operand shape"); +} + +TEST_F(HloParserTest, AllowShapeWhitespace) { + const string text = R"( +HloModule module + +ENTRY entry { + ROOT root = f32[ 1, 2,3, 4, 5]{0, 1, 2,3, 4 } parameter(0) +} +)"; + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + ParseHloString(text)); +} + +// custom call incompatible shape. + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc index b9c0b0c4ee1957fce48641230cef6391bcc9180e..cf33668f5bfa64a7843efc76e9f6768d18533240 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util.cc +++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/compiler/xla/service/hlo_proto_util.h" +#include "tensorflow/compiler/xla/service/hlo_verifier.h" #include @@ -36,17 +37,28 @@ HloProto MakeHloProto(const HloModule& module) { return proto; } +StatusOr> CreateModuleFromProto( + const HloModuleProto& proto, const HloModuleConfig& module_config) { + TF_ASSIGN_OR_RETURN(std::unique_ptr module, + HloModule::CreateFromProto(proto, module_config)); + TF_RETURN_IF_ERROR( + HloVerifier(/*layout_sensitive=*/false, /*allow_mixed_precision=*/false) + .Run(module.get()) + .status()); + return std::move(module); +} + StatusOr> EntryComputationParameterShapes( const HloProto& hlo_proto) { if (!hlo_proto.has_hlo_module()) { return NotFound("HloProto missing HloModuleProto."); } - if (!hlo_proto.hlo_module().has_program_shape()) { + if (!hlo_proto.hlo_module().has_host_program_shape()) { return NotFound("HloProto missing program shape."); } std::vector parameter_shapes; - const auto& program_shape = hlo_proto.hlo_module().program_shape(); + const auto& program_shape = hlo_proto.hlo_module().host_program_shape(); for (const Shape& shape : program_shape.parameters()) { parameter_shapes.push_back(&shape); } @@ -57,14 +69,14 @@ StatusOr EntryComputationOutputShape(const HloProto& hlo_proto) { if (!hlo_proto.has_hlo_module()) { return NotFound("HloProto missing HloModuleProto."); } - if (!hlo_proto.hlo_module().has_program_shape()) { + if (!hlo_proto.hlo_module().has_host_program_shape()) { return NotFound("HloProto missing program shape."); } - if (!hlo_proto.hlo_module().program_shape().has_result()) { + if (!hlo_proto.hlo_module().host_program_shape().has_result()) { return NotFound("HloProto missing result in its program shape"); } - return &hlo_proto.hlo_module().program_shape().result(); + return &hlo_proto.hlo_module().host_program_shape().result(); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.h b/tensorflow/compiler/xla/service/hlo_proto_util.h index 3d9c375cd5d26f92cf8316f78789daf4fc08c927..1db82dd6fcaa5d7fe7d65894c1021105f0b26266 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util.h +++ b/tensorflow/compiler/xla/service/hlo_proto_util.h @@ -35,6 +35,12 @@ HloProto MakeHloProto(const HloModule& module, // will not be included in the output. HloProto MakeHloProto(const HloModule& module); +// Create an HLO state from serialized representation. In addition to +// creating the proto with HloModule::CreateFromProto(...) it also +// uses HloVerifier to ensure basic invariants are held. +StatusOr> CreateModuleFromProto( + const HloModuleProto& proto, const HloModuleConfig& module_config); + // Returns the shapes of the parameters of the entry computation. Shape pointers // refer to shapes inside of the given HloProto. StatusOr> EntryComputationParameterShapes( diff --git a/tensorflow/compiler/xla/service/hlo_query.cc b/tensorflow/compiler/xla/service/hlo_query.cc index 2a07b6fcbc243d955e136ccdf097c8155a115845..2d5197be9e6f69f698729e06b7506a5bc6260bcd 100644 --- a/tensorflow/compiler/xla/service/hlo_query.cc +++ b/tensorflow/compiler/xla/service/hlo_query.cc @@ -24,7 +24,7 @@ namespace hlo_query { bool IsConstantR0F32(HloInstruction* instruction, float* out) { if (instruction->opcode() == HloOpcode::kConstant && - ShapeUtil::IsScalarF32(instruction->shape())) { + ShapeUtil::IsScalarWithElementType(instruction->shape(), F32)) { *out = instruction->literal().Get({}); return true; } diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 5ac43808ee2945eaa5003baad24d5d331419db83..49e46ecd00ee4370f3e93746348373b79febed3d 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -1215,7 +1215,7 @@ StatusOr HloRematerialization::Run(HloModule* module) { // by the caller. int64 module_output_size = 0; ShapeUtil::ForEachSubshape( - module->entry_computation()->root_instruction()->shape(), + module->result_shape(), [&module_output_size, this](const Shape& subshape, const ShapeIndex& /*index*/) { module_output_size += size_function_(subshape); diff --git a/tensorflow/compiler/xla/service/hlo_sharding.cc b/tensorflow/compiler/xla/service/hlo_sharding.cc index 188f4acc7945f3ec98065eae5a87a41c39730432..70a860c356ca2fb1c4c973ea3d96c50fabc2c7c2 100644 --- a/tensorflow/compiler/xla/service/hlo_sharding.cc +++ b/tensorflow/compiler/xla/service/hlo_sharding.cc @@ -469,6 +469,9 @@ absl::optional HloSharding::ExtractSingleSharding() const { if (!IsTuple()) { return *this; } + if (tuple_elements_.empty()) { + return absl::nullopt; + } for (int64 i = 1; i < tuple_elements_.size(); ++i) { if (tuple_elements_[0] != tuple_elements_[i]) { return absl::nullopt; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index fad3b14ec28ab7e16e7ac6806359fff766c790bb..ba95cef21da404646c3d347d3599209ce0a7f987 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -27,6 +27,23 @@ limitations under the License. namespace xla { +Status ShapeVerifier::Preprocess(HloInstruction* hlo) { + if (LayoutUtil::IsSparseArray(hlo->shape())) { + return InternalError("Sparse arrays are not yet fully supported: %s", + hlo->ToString()); + } + return Status::OK(); +} + +static Status CheckOperandCount(const HloInstruction* hlo, int expected) { + if (hlo->operand_count() != expected) { + return InternalError("Expected %d operands for %s instruction: %s", + expected, HloOpcodeString(hlo->opcode()), + hlo->ToString()); + } + return Status::OK(); +} + Status ShapeVerifier::HandleElementwiseUnary(HloInstruction* hlo) { return CheckUnaryShape(hlo); } @@ -58,12 +75,14 @@ Status ShapeVerifier::HandleConcatenate(HloInstruction* concatenate) { } Status ShapeVerifier::HandleConvert(HloInstruction* convert) { + TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1)); return CheckShape(convert, ShapeInference::InferConvertShape( convert->operand(0)->shape(), convert->shape().element_type())); } Status ShapeVerifier::HandleBitcastConvert(HloInstruction* convert) { + TF_RETURN_IF_ERROR(CheckOperandCount(convert, 1)); return CheckShape(convert, ShapeInference::InferBitcastConvertShape( convert->operand(0)->shape(), convert->shape().element_type())); @@ -74,6 +93,7 @@ Status ShapeVerifier::HandleCopy(HloInstruction* copy) { } Status ShapeVerifier::HandleDot(HloInstruction* dot) { + TF_RETURN_IF_ERROR(CheckOperandCount(dot, 2)); TF_ASSIGN_OR_RETURN(const Shape expected, ShapeInference::InferDotOpShape( dot->operand(0)->shape(), dot->operand(1)->shape(), @@ -82,6 +102,7 @@ Status ShapeVerifier::HandleDot(HloInstruction* dot) { } Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) { + TF_RETURN_IF_ERROR(CheckOperandCount(convolution, 2)); TF_ASSIGN_OR_RETURN( const Shape expected, ShapeInference::InferConvolveShape( @@ -92,6 +113,7 @@ Status ShapeVerifier::HandleConvolution(HloInstruction* convolution) { } Status ShapeVerifier::HandleFft(HloInstruction* fft) { + TF_RETURN_IF_ERROR(CheckOperandCount(fft, 1)); TF_ASSIGN_OR_RETURN( const Shape expected, ShapeInference::InferFftShape(fft->operand(0)->shape(), fft->fft_type(), @@ -118,11 +140,13 @@ Status ShapeVerifier::HandleAllToAll(HloInstruction* hlo) { } Status ShapeVerifier::HandleCollectivePermute(HloInstruction* hlo) { + TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 1)); return CheckShape(hlo, ShapeInference::InferCollectivePermuteShape( hlo->operand(0)->shape())); } Status ShapeVerifier::HandleReducePrecision(HloInstruction* reduce_precision) { + TF_RETURN_IF_ERROR(CheckOperandCount(reduce_precision, 1)); return CheckShape(reduce_precision, ShapeInference::InferReducePrecisionShape( reduce_precision->operand(0)->shape(), reduce_precision->exponent_bits(), @@ -156,6 +180,7 @@ Status ShapeVerifier::CheckOperandAndParameter( } Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1)); HloInfeedInstruction* infeed = Cast(instruction); TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 0)); @@ -166,6 +191,7 @@ Status ShapeVerifier::HandleInfeed(HloInstruction* instruction) { } Status ShapeVerifier::HandleOutfeed(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2)); HloOutfeedInstruction* outfeed = Cast(instruction); TF_RETURN_IF_ERROR(CheckIsTokenOperand(instruction, 1)); @@ -192,10 +218,7 @@ bool ShapeVerifier::HasCompatibleElementTypes(const Shape& shape_0, } Status ShapeVerifier::HandleRng(HloInstruction* instruction) { - if (instruction->operand_count() != 2) { - return InternalError("Expected two operands for Rng instruction: %s", - instruction->ToString()); - } + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2)); const Shape& shape_0 = instruction->operand(0)->shape(); const Shape& shape_1 = instruction->operand(1)->shape(); @@ -244,29 +267,42 @@ Status ShapeVerifier::HandleRng(HloInstruction* instruction) { } Status ShapeVerifier::HandleReverse(HloInstruction* reverse) { + TF_RETURN_IF_ERROR(CheckOperandCount(reverse, 1)); return CheckShape( reverse, ShapeInference::InferReverseShape(reverse->operand(0)->shape(), reverse->dimensions())); } Status ShapeVerifier::HandleSort(HloInstruction* sort) { - if (sort->operand_count() == 2 && - !ShapeUtil::SameDimensions(sort->operand(0)->shape(), - sort->operand(1)->shape())) { - return InternalError( - "Expected sort to have to have the same dimensions for the keys and " - "the values. Keys shape is: %s\n, Values shape is: %s", - StringifyShape(sort->operand(0)->shape()), - StringifyShape(sort->operand(1)->shape())); + if (sort->operand_count() < 1) { + return InternalError("Expected at least 1 operand for %s instruction: %s", + HloOpcodeString(sort->opcode()), sort->ToString()); + } + for (int64 operand = 1; operand < sort->operand_count(); ++operand) { + if (!ShapeUtil::SameDimensions(sort->operand(0)->shape(), + sort->operand(operand)->shape())) { + return InternalError( + "Expected sort to have to have the same dimensions for the keys " + "and the values. Keys shape is: %s\n, Values shape (operand index " + "%lld) is: %s", + StringifyShape(sort->operand(0)->shape()), operand, + StringifyShape(sort->operand(operand)->shape())); + } } return CheckVariadicShape(sort); } Status ShapeVerifier::HandleConstant(HloInstruction* constant) { + TF_RETURN_IF_ERROR(CheckOperandCount(constant, 0)); + if (!Cast(constant)->HasLiteral()) { + return InternalError("Constant is required to have a valid literal: %s", + constant->ToString()); + } return CheckShape(constant, constant->literal().shape()); } Status ShapeVerifier::HandleIota(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 0)); auto* iota = Cast(instruction); const int64 rank = ShapeUtil::Rank(iota->shape()); if (rank == 0) { @@ -281,6 +317,7 @@ Status ShapeVerifier::HandleIota(HloInstruction* instruction) { } Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) { + TF_RETURN_IF_ERROR(CheckOperandCount(get_tuple_element, 1)); return CheckShape(get_tuple_element, ShapeInference::InferGetTupleElementShape( get_tuple_element->operand(0)->shape(), @@ -288,6 +325,12 @@ Status ShapeVerifier::HandleGetTupleElement(HloInstruction* get_tuple_element) { } Status ShapeVerifier::HandleReduce(HloInstruction* reduce) { + if (reduce->operand_count() % 2 != 0) { + return InternalError( + "Expected an even number of operands for %s instruction: %s", + HloOpcodeString(reduce->opcode()), reduce->ToString()); + } + std::vector operand_shapes; for (const HloInstruction* operand : reduce->operands()) { operand_shapes.push_back(&operand->shape()); @@ -298,10 +341,12 @@ Status ShapeVerifier::HandleReduce(HloInstruction* reduce) { } Status ShapeVerifier::HandleBitcast(HloInstruction* bitcast) { + TF_RETURN_IF_ERROR(CheckOperandCount(bitcast, 1)); return Status::OK(); } Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { + TF_RETURN_IF_ERROR(CheckOperandCount(broadcast, 1)); // HLO broadcast has no exact analog at the proto level so there is no // ShapeInference method. Check the output shape explicitly. const Shape& operand_shape = broadcast->operand(0)->shape(); @@ -313,14 +358,16 @@ Status ShapeVerifier::HandleBroadcast(HloInstruction* broadcast) { operand_dimension < ShapeUtil::Rank(operand_shape); ++operand_dimension) { int64 output_dimension = broadcast->dimensions()[operand_dimension]; - TF_RET_CHECK(broadcast->shape().dimensions(output_dimension) == - operand_shape.dimensions(operand_dimension)) + TF_RET_CHECK((output_dimension < ShapeUtil::Rank(broadcast->shape())) && + (broadcast->shape().dimensions(output_dimension) == + operand_shape.dimensions(operand_dimension))) << broadcast->ToString() << " operand shape " << operand_shape; } return Status::OK(); } Status ShapeVerifier::HandleReshape(HloInstruction* reshape) { + TF_RETURN_IF_ERROR(CheckOperandCount(reshape, 1)); // Check for mixed precision. TF_RETURN_IF_ERROR(CheckShape(reshape, reshape->shape())); TF_RET_CHECK(ShapeUtil::ElementsIn(reshape->shape()) == @@ -329,12 +376,14 @@ Status ShapeVerifier::HandleReshape(HloInstruction* reshape) { } Status ShapeVerifier::HandleTranspose(HloInstruction* transpose) { + TF_RETURN_IF_ERROR(CheckOperandCount(transpose, 1)); return CheckShape( transpose, ShapeInference::InferTransposeShape( transpose->operand(0)->shape(), transpose->dimensions())); } Status ShapeVerifier::HandleParameter(HloInstruction* hlo) { + TF_RETURN_IF_ERROR(CheckOperandCount(hlo, 0)); return Status::OK(); } @@ -359,9 +408,30 @@ Status ShapeVerifier::HandleCall(HloInstruction* call) { return CheckShape(call, call->to_apply()->root_instruction()->shape()); } -Status ShapeVerifier::HandleCustomCall(HloInstruction*) { return Status::OK(); } +Status ShapeVerifier::HandleCustomCall(HloInstruction* instruction) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + TF_RET_CHECK(custom_call != nullptr); + if (custom_call->layout_constrained()) { + // If the layout is constrained, verify all the respective shapes have + // layouts and that the constrained operand shapes match the shapes of the + // operands. + TF_RET_CHECK(LayoutUtil::HasLayout(custom_call->shape())); + TF_RET_CHECK(custom_call->operand_count() == + custom_call->operand_shapes_with_layout().size()); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + const Shape& operand_shape_with_layout = + custom_call->operand_shapes_with_layout()[i]; + TF_RET_CHECK(ShapeUtil::Compatible(custom_call->operand(i)->shape(), + operand_shape_with_layout)); + TF_RET_CHECK(LayoutUtil::HasLayout(operand_shape_with_layout)); + } + } + return Status::OK(); +} Status ShapeVerifier::HandleSlice(HloInstruction* slice) { + TF_RETURN_IF_ERROR(CheckOperandCount(slice, 1)); return CheckShape(slice, ShapeInference::InferSliceShape( slice->operand(0)->shape(), slice->slice_starts(), @@ -369,6 +439,7 @@ Status ShapeVerifier::HandleSlice(HloInstruction* slice) { } Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) { + TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_slice, 2)); return CheckShape(dynamic_slice, ShapeInference::InferDynamicSliceShape( dynamic_slice->operand(0)->shape(), dynamic_slice->operand(1)->shape(), @@ -377,6 +448,7 @@ Status ShapeVerifier::HandleDynamicSlice(HloInstruction* dynamic_slice) { Status ShapeVerifier::HandleDynamicUpdateSlice( HloInstruction* dynamic_update_slice) { + TF_RETURN_IF_ERROR(CheckOperandCount(dynamic_update_slice, 3)); return CheckShape(dynamic_update_slice, ShapeInference::InferDynamicUpdateSliceShape( dynamic_update_slice->operand(0)->shape(), @@ -406,6 +478,7 @@ Status ShapeVerifier::HandleMap(HloInstruction* map) { } Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) { + TF_RETURN_IF_ERROR(CheckOperandCount(reduce_window, 2)); return CheckShape( reduce_window, ShapeInference::InferReduceWindowShape( @@ -415,6 +488,7 @@ Status ShapeVerifier::HandleReduceWindow(HloInstruction* reduce_window) { } Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3)); return CheckShape( instruction, ShapeInference::InferSelectAndScatterShape( @@ -425,6 +499,7 @@ Status ShapeVerifier::HandleSelectAndScatter(HloInstruction* instruction) { } Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) { + TF_RETURN_IF_ERROR(CheckOperandCount(xla_while, 1)); TF_RETURN_IF_ERROR( CheckOperandAndParameter(xla_while, 0, xla_while->while_body(), 0)); TF_RETURN_IF_ERROR( @@ -444,6 +519,7 @@ Status ShapeVerifier::HandleWhile(HloInstruction* xla_while) { } Status ShapeVerifier::HandleConditional(HloInstruction* conditional) { + TF_RETURN_IF_ERROR(CheckOperandCount(conditional, 3)); TF_RETURN_IF_ERROR(CheckOperandAndParameter( conditional, 1, conditional->true_computation(), 0)); TF_RETURN_IF_ERROR(CheckOperandAndParameter( @@ -458,12 +534,14 @@ Status ShapeVerifier::HandleConditional(HloInstruction* conditional) { } Status ShapeVerifier::HandlePad(HloInstruction* pad) { + TF_RETURN_IF_ERROR(CheckOperandCount(pad, 2)); return CheckShape(pad, ShapeInference::InferPadShape(pad->operand(0)->shape(), pad->operand(1)->shape(), pad->padding_config())); } Status ShapeVerifier::HandleSend(HloInstruction* send) { + TF_RETURN_IF_ERROR(CheckOperandCount(send, 2)); return CheckShape(send, ShapeUtil::MakeTupleShape({send->operand(0)->shape(), ShapeUtil::MakeShape(U32, {}), @@ -471,10 +549,12 @@ Status ShapeVerifier::HandleSend(HloInstruction* send) { } Status ShapeVerifier::HandleSendDone(HloInstruction* send_done) { + TF_RETURN_IF_ERROR(CheckOperandCount(send_done, 1)); return CheckShape(send_done, ShapeUtil::MakeTokenShape()); } Status ShapeVerifier::HandleRecv(HloInstruction* recv) { + TF_RETURN_IF_ERROR(CheckOperandCount(recv, 1)); return CheckShape( recv, ShapeUtil::MakeTupleShape( {ShapeUtil::GetTupleElementShape(recv->shape(), 0), @@ -482,6 +562,7 @@ Status ShapeVerifier::HandleRecv(HloInstruction* recv) { } Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) { + TF_RETURN_IF_ERROR(CheckOperandCount(recv_done, 1)); return CheckShape( recv_done, ShapeUtil::MakeTupleShape( @@ -491,6 +572,7 @@ Status ShapeVerifier::HandleRecvDone(HloInstruction* recv_done) { Status ShapeVerifier::HandleBatchNormTraining( HloInstruction* batch_norm_training) { + TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_training, 3)); return CheckShape(batch_norm_training, ShapeInference::InferBatchNormTrainingShape( batch_norm_training->operand(0)->shape(), @@ -501,6 +583,7 @@ Status ShapeVerifier::HandleBatchNormTraining( Status ShapeVerifier::HandleBatchNormInference( HloInstruction* batch_norm_inference) { + TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_inference, 5)); return CheckShape(batch_norm_inference, ShapeInference::InferBatchNormInferenceShape( batch_norm_inference->operand(0)->shape(), @@ -512,6 +595,7 @@ Status ShapeVerifier::HandleBatchNormInference( } Status ShapeVerifier::HandleBatchNormGrad(HloInstruction* batch_norm_grad) { + TF_RETURN_IF_ERROR(CheckOperandCount(batch_norm_grad, 5)); return CheckShape(batch_norm_grad, ShapeInference::InferBatchNormGradShape( batch_norm_grad->operand(0)->shape(), batch_norm_grad->operand(1)->shape(), @@ -580,6 +664,7 @@ Status CheckMixedPrecisionOperands(const HloInstruction* instruction) { } // namespace Status ShapeVerifier::HandleGather(HloInstruction* gather) { + TF_RETURN_IF_ERROR(CheckOperandCount(gather, 2)); return CheckShape( gather, ShapeInference::InferGatherShape( @@ -588,6 +673,7 @@ Status ShapeVerifier::HandleGather(HloInstruction* gather) { } Status ShapeVerifier::HandleScatter(HloInstruction* scatter) { + TF_RETURN_IF_ERROR(CheckOperandCount(scatter, 3)); return CheckShape( scatter, ShapeInference::InferScatterShape( scatter->operand(0)->shape(), scatter->operand(1)->shape(), @@ -675,12 +761,14 @@ Status ShapeVerifier::CheckShape(const HloInstruction* instruction, } Status ShapeVerifier::CheckUnaryShape(const HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 1)); return CheckShape(instruction, ShapeInference::InferUnaryOpShape(instruction->opcode(), instruction->operand(0))); } Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 2)); return CheckShape( instruction, ShapeInference::InferBinaryOpShape(instruction->opcode(), instruction->operand(0), @@ -688,6 +776,7 @@ Status ShapeVerifier::CheckBinaryShape(const HloInstruction* instruction) { } Status ShapeVerifier::CheckTernaryShape(const HloInstruction* instruction) { + TF_RETURN_IF_ERROR(CheckOperandCount(instruction, 3)); return CheckShape(instruction, ShapeInference::InferTernaryOpShape( instruction->opcode(), instruction->operand(0), @@ -795,6 +884,56 @@ Status VerifyEntryAndExitShapes(const HloModule& module) { return Status::OK(); } +// Verifies that entry computation layout matches characteristics of +// entry computation. +Status CheckEntryComputationLayout(const HloModule& module) { + const HloComputation* computation = module.entry_computation(); + const auto& layout = module.entry_computation_layout(); + const ShapeLayout& result_layout = layout.result_layout(); + + if (LayoutUtil::IsSparseArray(result_layout.shape())) { + return Unimplemented( + "Sparse arrays are not yet fully supported in program result shape: %s", + ShapeUtil::HumanStringWithLayout(result_layout.shape())); + } + + if (!ShapeUtil::Compatible(computation->root_instruction()->shape(), + result_layout.shape())) { + return InternalError( + "Shape of the root instruction of entry computation (%s) should be " + "compatible to one specified in module's entry computation layout (%s)", + ShapeUtil::HumanString(computation->root_instruction()->shape()), + ShapeUtil::HumanString(result_layout.shape())); + } + + if (computation->num_parameters() != layout.parameter_count()) { + return InternalError( + "Number of parameters in entry computation layout (%d) must be same " + "as number of parameters of entry computation computation (%d)", + layout.parameter_count(), computation->num_parameters()); + } + + for (int i = 0; i < computation->num_parameters(); ++i) { + const HloInstruction* parameter = computation->parameter_instruction(i); + if (LayoutUtil::IsSparseArray(layout.parameter_shape(i))) { + return Unimplemented( + "Sparse arrays are not yet fully supported " + "in program parameter shape: %s", + ShapeUtil::HumanStringWithLayout(layout.parameter_shape(i))); + } + if (!ShapeUtil::Compatible(parameter->shape(), layout.parameter_shape(i))) { + return InternalError( + "Shape of the entry computation parameter %d is %s should be " + "compatible to the one specified in module's entry computation " + "layout %s", + i, ShapeUtil::HumanString(parameter->shape()), + ShapeUtil::HumanString(layout.parameter_shape(i))); + } + } + + return Status::OK(); +} + // Checks if the given two instructions share the same channel id. Status CheckSameChannel(const HloInstruction* instr1, const HloInstruction* instr2) { @@ -1192,6 +1331,7 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(computation->Accept(&instruction_verifier)); } + TF_RETURN_IF_ERROR(CheckEntryComputationLayout(*module)); TF_RETURN_IF_ERROR(VerifyEntryAndExitShapes(*module)); // If the module has a schedule, it must be valid. @@ -1199,6 +1339,8 @@ StatusOr HloVerifier::Run(HloModule* module) { TF_RETURN_IF_ERROR(module->schedule().Verify()); } + TF_RETURN_IF_ERROR(module->input_output_alias_config().Verify(*module)); + return false; } diff --git a/tensorflow/compiler/xla/service/hlo_verifier.h b/tensorflow/compiler/xla/service/hlo_verifier.h index cb49cb95ba8949b84f57d985bdb07a3177edbc5a..e1f3402465746b0478d7bb7e4ee2b66e3f839eb2 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.h +++ b/tensorflow/compiler/xla/service/hlo_verifier.h @@ -32,6 +32,8 @@ class ShapeVerifier : public DfsHloVisitor { : layout_sensitive_(layout_sensitive), allow_mixed_precision_(allow_mixed_precision) {} + Status Preprocess(HloInstruction* hlo) override; + Status HandleElementwiseUnary(HloInstruction* hlo) override; Status HandleElementwiseBinary(HloInstruction* hlo) override; Status HandleClamp(HloInstruction* clamp) override; diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.cc b/tensorflow/compiler/xla/service/interpreter/compiler.cc index 7c79eb7d791bc9a0743605d3171ff69c6ef41d58..26643667c8674c85e5d03da4c5a2d63833e1d27f 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.cc +++ b/tensorflow/compiler/xla/service/interpreter/compiler.cc @@ -57,6 +57,12 @@ StatusOr> InterpreterCompiler::RunHloPasses( return std::move(hlo_module); } +Status InterpreterCompiler::RunHloPassesOnModuleGroup( + HloModuleGroup* module_group, se::StreamExecutor* executor, + DeviceMemoryAllocator* device_allocator) { + return Unimplemented("Module group compilation not supported on Interpreter"); +} + StatusOr> InterpreterCompiler::RunBackend( std::unique_ptr hlo_module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* /*device_allocator*/) { @@ -76,17 +82,26 @@ StatusOr> InterpreterCompiler::RunBackend( return std::move(executable); } +StatusOr>> +InterpreterCompiler::RunBackendOnModuleGroup( + std::unique_ptr module_group, + std::vector> stream_exec, + DeviceMemoryAllocator* device_allocator) { + return Unimplemented( + "Module group compilation is not supported on Interpreter."); +} + StatusOr>> InterpreterCompiler::Compile( - std::vector> /*hlo_modules*/, + std::unique_ptr /*module_group*/, std::vector> /*stream_execs*/, DeviceMemoryAllocator* /*device_allocator*/) { - return tensorflow::errors::Unimplemented( - "Compilation of multiple HLO modules is not supported on Interpreter."); + return Unimplemented( + "Module group compilation is not supported on Interpreter."); } StatusOr>> InterpreterCompiler::CompileAheadOfTime( - std::vector> hlo_modules, + std::unique_ptr module_group, const AotCompilationOptions& aot_options) { return tensorflow::errors::InvalidArgument( "AOT compilation not supported on Interpreter"); diff --git a/tensorflow/compiler/xla/service/interpreter/compiler.h b/tensorflow/compiler/xla/service/interpreter/compiler.h index e90ae3e818522e6e4fd9d9f5acb846800bc899ca..d8cb32c0beb279ae6484b1b8f5f99085c2d67c67 100644 --- a/tensorflow/compiler/xla/service/interpreter/compiler.h +++ b/tensorflow/compiler/xla/service/interpreter/compiler.h @@ -46,18 +46,25 @@ class InterpreterCompiler : public Compiler { StatusOr> RunHloPasses( std::unique_ptr hlo_module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; + Status RunHloPassesOnModuleGroup( + HloModuleGroup* module_group, se::StreamExecutor* executor, + DeviceMemoryAllocator* device_allocator) override; StatusOr> RunBackend( std::unique_ptr hlo_module, se::StreamExecutor* stream_exec, DeviceMemoryAllocator* device_allocator) override; + StatusOr>> RunBackendOnModuleGroup( + std::unique_ptr module_group, + std::vector> stream_exec, + DeviceMemoryAllocator* device_allocator) override; StatusOr>> Compile( - std::vector> hlo_modules, + std::unique_ptr module_group, std::vector> stream_exec, DeviceMemoryAllocator* device_allocator) override; StatusOr>> - CompileAheadOfTime(std::vector> hlo_modules, + CompileAheadOfTime(std::unique_ptr module_group, const AotCompilationOptions& aot_options) override; HloCostAnalysis::ShapeSizeFunction ShapeSizeBytesFunction() const override; diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index cc4a342e9d38415599256a5eaf3f5cf757652659..232d1dc0879cd6931158e642e01fe68e43e6c655 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -419,6 +419,16 @@ Status LayoutAssignment::BuildHostChannelConstraints( return Status::OK(); } +namespace { + +bool IsLayoutConstrainedCustomCall(HloInstruction* instruction) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + return custom_call != nullptr && custom_call->layout_constrained(); +} + +} // namespace + Status LayoutAssignment::AddMandatoryConstraints( const ComputationLayout* computation_layout, ChannelLayoutConstraints* channel_constraints, HloComputation* computation, @@ -434,7 +444,6 @@ Status LayoutAssignment::AddMandatoryConstraints( // Constrain layouts of instructions which define values with pre-existing // layouts. for (auto* instruction : computation->instructions()) { - Shape const* shape_with_layout = nullptr; if (instruction->opcode() == HloOpcode::kInfeed) { // Infeed layouts must match the layout of the original inserted // instruction. @@ -456,17 +465,21 @@ Status LayoutAssignment::AddMandatoryConstraints( if (parameter_layout.LayoutIsSet()) { // Parameter layouts must match the respective layout in // ComputationLayout, if there is one. - shape_with_layout = ¶meter_layout.shape(); + TF_RETURN_IF_ERROR(constraints->SetInstructionLayout( + parameter_layout.shape(), instruction)); } } - } - if (shape_with_layout != nullptr) { + } else if (IsLayoutConstrainedCustomCall(instruction)) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); TF_RETURN_IF_ERROR( - constraints->SetInstructionLayout(*shape_with_layout, instruction)); - } - - if (instruction->opcode() == HloOpcode::kSend || - instruction->opcode() == HloOpcode::kRecv) { + constraints->SetInstructionLayout(custom_call->shape(), custom_call)); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + TF_RETURN_IF_ERROR(constraints->SetOperandLayout( + custom_call->operand_shapes_with_layout()[i], custom_call, i)); + } + } else if (instruction->opcode() == HloOpcode::kSend || + instruction->opcode() == HloOpcode::kRecv) { CHECK(get_channel_constraints(instruction)) << "Multi-module layout assignment requires ChannelLayoutConstraints"; int64 channel_id = instruction->channel_id(); @@ -621,31 +634,6 @@ Status LayoutAssignment::AddMandatoryConstraints( TF_RETURN_IF_ERROR(constraints->SetOperandLayout( false_computation_layout.parameter_shape(0), instruction, 2, /*mandatory=*/true)); - } else if (instruction->opcode() == HloOpcode::kCustomCall) { - if (!CustomCallRequiresMajorFirstLayout(instruction)) { - continue; - } - // Add constraints for kCustomCall instruction operands and instructions. - // For now we only support major-first layouts for all inputs and outputs. - Shape result_shape = ShapeUtil::MakeShapeWithDescendingLayout( - instruction->shape().element_type(), - AsInt64Slice(instruction->shape().dimensions())); - TF_RETURN_IF_ERROR( - constraints->SetInstructionLayout(result_shape, instruction)); - for (int64 i = 0; i < instruction->operand_count(); ++i) { - const Shape& operand_shape = instruction->operand(i)->shape(); - // Opaque operands don't get a layout constraint. - if (ShapeUtil::IsOpaque(operand_shape)) { - continue; - } - - Shape row_major_operand_shape = - ShapeUtil::MakeShapeWithDescendingLayout( - operand_shape.element_type(), - AsInt64Slice(operand_shape.dimensions())); - TF_RETURN_IF_ERROR(constraints->SetOperandLayout( - row_major_operand_shape, instruction, i)); - } } } // Finally set the result layout to match ComputationLayout, if there is one. @@ -676,16 +664,18 @@ Status CheckCallLayout(HloInstruction* call, return Status::OK(); } -// Custom calls have fixed input and output layouts. -Status CheckCustomCallLayout(HloInstruction* custom_call) { - for (const HloInstruction* operand : custom_call->operands()) { - TF_RET_CHECK( - ShapeUtil::IsOpaque(operand->shape()) || - LayoutUtil::IsMonotonicWithDim0Major(operand->shape().layout())); +// Operands of layout-constrained custom calls must match the expected +// constrained layouts. +Status CheckCustomCallLayout(HloInstruction* instruction) { + if (IsLayoutConstrainedCustomCall(instruction)) { + const HloCustomCallInstruction* custom_call = + DynCast(instruction); + for (int64 i = 0; i < custom_call->operand_count(); ++i) { + TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual( + custom_call->operand(i)->shape(), + custom_call->operand_shapes_with_layout()[i])); + } } - TF_RET_CHECK( - ShapeUtil::IsOpaque(custom_call->shape()) || - LayoutUtil::IsMonotonicWithDim0Major(custom_call->shape().layout())); return Status::OK(); } @@ -932,9 +922,7 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { FindOrDie(computation_layouts_, instruction->to_apply()))); break; case HloOpcode::kCustomCall: - if (CustomCallRequiresMajorFirstLayout(instruction)) { - TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction)); - } + TF_RETURN_IF_ERROR(CheckCustomCallLayout(instruction)); break; case HloOpcode::kFusion: TF_RETURN_IF_ERROR(CheckFusionLayout(instruction)); @@ -971,9 +959,8 @@ Status LayoutAssignment::CheckLayouts(HloModule* module) { FindOrDie(computation_layouts_, module->entry_computation()) .result_layout(); if (result_layout.LayoutIsSet()) { - TF_RET_CHECK(ShapeUtil::Equal( - module->entry_computation()->root_instruction()->shape(), - result_layout.shape())); + TF_RET_CHECK( + ShapeUtil::Equal(module->result_shape(), result_layout.shape())); } return Status::OK(); } @@ -1536,6 +1523,10 @@ Status LayoutAssignment::AssignLayouts(const LayoutConstraints& constraints, // Execute extra verification step once the layout has been finalized. TF_RETURN_IF_ERROR(Verify(instruction)); + // Shape must be valid. + TF_RETURN_IF_ERROR( + ShapeUtil::ValidateShapeWithOptionalLayout(instruction->shape())); + // Verify all layouts in the shape have been set. TF_RET_CHECK(LayoutUtil::HasLayout(instruction->shape())); } @@ -1554,11 +1545,11 @@ Status LayoutAssignment::CalculateComputationLayout( Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { // Clear existing layouts of the instructions. All layouts must be assigned - // by the LayoutAssignment pass, except for those on infeeds, parameters, - // and the computation result. The latter two are specified in - // computation_layout, so we only need to keep the existing layouts for - // infeeds. Clearing the layouts here avoids hiding potential bugs in the - // layout assignment pass that may accidentally use the existing layout. + // by the LayoutAssignment pass, except for those on parameters, the + // computation result, and a couple special cases. The former two are + // specified in computation_layout. Clearing the layouts here avoids hiding + // potential bugs in the layout assignment pass that may accidentally use the + // existing layout. for (HloInstruction* instruction : computation->instructions()) { if (instruction->opcode() == HloOpcode::kBitcast) { // bitcasts are inherently layout sensitive and so a bitcast instruction @@ -1567,7 +1558,9 @@ Status LayoutAssignment::ClearComputationLayouts(HloComputation* computation) { "Unexpected bitcast operation seen during layout assignment: %s.", instruction->ToString()); } - if (instruction->opcode() != HloOpcode::kInfeed) { + // Some instructions carry mandatory layouts in their shape. + if (instruction->opcode() != HloOpcode::kInfeed && + !IsLayoutConstrainedCustomCall(instruction)) { LayoutUtil::ClearLayout(instruction->mutable_shape()); } } @@ -1802,6 +1795,18 @@ StatusOr LayoutAssignment::Run(HloModule* module) { } TF_RETURN_IF_ERROR(Init()); + // Verify computation layout is sane. + const HloComputation* entry = module->entry_computation(); + TF_RET_CHECK(entry_computation_layout_->parameter_count() == + entry->num_parameters()); + for (int64 i = 0; i < entry->num_parameters(); ++i) { + TF_RET_CHECK( + ShapeUtil::Compatible(entry_computation_layout_->parameter_shape(i), + entry->parameter_instruction(i)->shape())); + } + TF_RET_CHECK(ShapeUtil::Compatible(entry_computation_layout_->result_shape(), + entry->root_instruction()->shape())); + // We do two passes. The first one we pass a nullptr ComputationLayout to // the RunOnComputation() calls (for non entry computations), and we register // the ComputationLayout which are naturally flowing in DFS fashion to the @@ -1873,7 +1878,6 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kCrossReplicaSum: case HloOpcode::kAllToAll: case HloOpcode::kCollectivePermute: - case HloOpcode::kCustomCall: case HloOpcode::kDivide: case HloOpcode::kDynamicSlice: case HloOpcode::kDynamicUpdateSlice: @@ -1907,6 +1911,7 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kRemainder: case HloOpcode::kReverse: case HloOpcode::kRoundNearestAfz: + case HloOpcode::kScatter: case HloOpcode::kSelect: case HloOpcode::kSelectAndScatter: case HloOpcode::kShiftLeft: @@ -1930,6 +1935,7 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kConstant: case HloOpcode::kConvolution: case HloOpcode::kCopy: + case HloOpcode::kCustomCall: case HloOpcode::kDomain: case HloOpcode::kDot: case HloOpcode::kFusion: @@ -1944,7 +1950,6 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kReduce: case HloOpcode::kReshape: case HloOpcode::kRng: - case HloOpcode::kScatter: case HloOpcode::kSend: case HloOpcode::kSendDone: case HloOpcode::kAfterAll: diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 2d48e122637c080fc2bcf7bce1c2a2521f51e41f..cb56f4cd19ded036ef521a579eb7d6ea7f3b6268 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -333,19 +333,6 @@ class LayoutAssignment : public HloModulePass { const ResultLayoutConstraint& layout_constraint, LayoutConstraints* constraints); - // By default LayoutAssignment ensures that inputs and outputs of CustomCalls - // have the "major-first" layout (i.e. {n, n-1, ..., 0}). - // - // If this function returns true, LayoutAssignment does not set a layout for - // the given CustomCall. It's up to the backend to set one in - // AddBackendConstraints, if necessary. - // - // Precondition: instruction->opcode() == HloOpcode::kCustomCall. - virtual bool CustomCallRequiresMajorFirstLayout( - const HloInstruction* /*instruction*/) { - return true; - } - // Called after layouts of an instruction have been finalized to allow // subclasses to check for platform specific assumptions. virtual Status Verify(const HloInstruction* instruction) { diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 2c549cd872b35e55cc00527b6579f79d8516b66c..a831751fa96f8cef233e16fe02378ac036efc8ab 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -65,6 +65,27 @@ class LayoutAssignmentTest : public HloVerifiedTestBase { FindInstruction(module, name)->shape().layout().minor_to_major(); return std::vector(minor_to_major.begin(), minor_to_major.end()); } + + void ExpectLayoutIs(const Shape& shape, + absl::Span minor_to_major) { + const Layout expected = LayoutUtil::MakeLayout(minor_to_major); + EXPECT_TRUE(LayoutUtil::Equal(shape.layout(), expected)) + << "Expected layout " << expected << ", actual " << shape.layout(); + } + + void ExpectTupleLayoutIs( + const Shape& shape, + std::initializer_list> minor_to_majors) { + int i = 0; + for (const absl::Span minor_to_major : minor_to_majors) { + const Layout expected = LayoutUtil::MakeLayout(minor_to_major); + const Layout& actual = ShapeUtil::GetTupleElementShape(shape, i).layout(); + EXPECT_TRUE(LayoutUtil::Equal(actual, expected)) + << "Expected tuple element " << i << " layout " << expected + << ", actual " << actual; + ++i; + } + } }; TEST_F(LayoutAssignmentTest, ComputationLayout) { @@ -1102,5 +1123,173 @@ TEST_F(LayoutAssignmentTest, TupleCopyOnLayoutMismatch) { EXPECT_THAT(LayoutOf(&module(), "next_buf"), ElementsAre(1, 0)); } +TEST_F(LayoutAssignmentTest, CustomCallNotLayoutConstrained) { + const char* module_str = R"( +HloModule CustomCallNotLayoutConstrained + +ENTRY %CustomCallWithNotLayoutConstrained (p: f32[42,2,3]) -> f32[1,2,3,4] { + %p = f32[42,2,3] parameter(0) + ROOT %custom-call = f32[1,2,3,4] custom-call(f32[42,2,3] %p), custom_call_target="baz" +} +)"; + // Try with a couple different layouts. In each case the custom calls operand + // and result layout should match that of the computation. + { + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 2, 1})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {3, 2, 0, 1})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ASSERT_THAT(root, op::CustomCall(op::Parameter())); + ExpectLayoutIs(root->shape(), {3, 2, 0, 1}); + ExpectLayoutIs(root->operand(0)->shape(), {0, 2, 1}); + } + { + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {42, 2, 3}, {0, 1, 2})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {0, 2, 3, 1})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ASSERT_THAT(root, op::CustomCall(op::Parameter())); + ExpectLayoutIs(root->shape(), {0, 2, 3, 1}); + ExpectLayoutIs(root->operand(0)->shape(), {0, 1, 2}); + } +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrained) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrained + +ENTRY %CustomCallWithLayoutConstraints (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] { + %p0 = f32[4,4] parameter(0) + %p1 = f32[2,3] parameter(1) + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(f32[4,4] %p0, f32[2,3] %p1), custom_call_target="baz", operand_layout_constraints={f32[4,4]{0,1}, f32[2,3]{1,0}} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_parameter_layout(1) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + // The custom call should be partially encapsulated in kCopy instructions + // because of the layout mismatches. + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall(op::Copy(), op::Parameter()))); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); + ExpectLayoutIs(custom_call->operand(0)->shape(), {0, 1}); + ExpectLayoutIs(custom_call->operand(1)->shape(), {1, 0}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedZeroOperands) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedZeroOperands + +ENTRY %CustomCallLayoutConstrainedZeroOperands () -> f32[1,2,3,4] { + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(), custom_call_target="baz", operand_layout_constraints={} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall())); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleOperand) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedTupleOperand + +ENTRY %CustomCallLayoutConstrainedTupleOperand (p0: f32[4,4], p1: f32[2,3]) -> f32[1,2,3,4] { + %p0 = f32[4,4] parameter(0) + %p1 = f32[2,3] parameter(1) + %tuple = (f32[4,4], f32[2,3]) tuple(%p0, %p1) + ROOT %custom-call = f32[1,2,3,4]{3,2,0,1} custom-call(%tuple), custom_call_target="baz", operand_layout_constraints={(f32[4,4]{1,0}, f32[2,3]{0,1})} +} +)"; + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_parameter_layout(1) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})); + *computation_layout.mutable_result_layout() = ShapeLayout( + ShapeUtil::MakeShapeWithLayout(F32, {1, 2, 3, 4}, {2, 1, 0, 3})); + AssignLayouts(module.get(), &computation_layout); + + HloInstruction* root = module->entry_computation()->root_instruction(); + ExpectLayoutIs(root->shape(), {2, 1, 0, 3}); + + ASSERT_THAT(module->entry_computation()->root_instruction(), + op::Copy(op::CustomCall(op::Tuple()))); + + const HloInstruction* custom_call = + module->entry_computation()->root_instruction()->operand(0); + ExpectLayoutIs(custom_call->shape(), {3, 2, 0, 1}); + ExpectTupleLayoutIs(custom_call->operand(0)->shape(), {{1, 0}, {0, 1}}); +} + +TEST_F(LayoutAssignmentTest, CustomCallLayoutConstrainedTupleResult) { + const char* module_str = R"( +HloModule CustomCallLayoutConstrainedTupleResult + +ENTRY %CustomCallLayoutConstrainedTupleResult (p0: f32[4,4]) -> (f32[4,4]{1,0}, f32[2,3]{0,1}) { + %p0 = f32[4,4] parameter(0) + ROOT %custom-call = (f32[4,4]{1,0}, f32[2,3]{0,1}) custom-call(%p0), custom_call_target="baz", operand_layout_constraints={f32[4,4]{1,0}} +} +)"; + // Try with a couple different layouts. In each case the custom calls operand + // and result layout should match that of the computation. + TF_ASSERT_OK_AND_ASSIGN( + std::unique_ptr module, + ParseAndReturnVerifiedModule(module_str, GetModuleConfigForTest())); + ComputationLayout computation_layout = module->entry_computation_layout(); + *computation_layout.mutable_parameter_layout(0) = + ShapeLayout(ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0})); + *computation_layout.mutable_result_layout() = + ShapeLayout(ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShapeWithLayout(F32, {4, 4}, {1, 0}), + ShapeUtil::MakeShapeWithLayout(F32, {2, 3}, {1, 0})})); + AssignLayouts(module.get(), &computation_layout); + + ExpectTupleLayoutIs(module->result_shape(), {{1, 0}, {1, 0}}); + + const HloInstruction* custom_call = + FindInstruction(module.get(), "custom-call"); + ExpectTupleLayoutIs(custom_call->shape(), {{1, 0}, {0, 1}}); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/llvm_compiler.cc b/tensorflow/compiler/xla/service/llvm_compiler.cc index b17c9d504501a907e27d5152e0082799e87443c7..d287aa4ec7bbcd11f51ea07cd2a1572e59f0d6c6 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.cc +++ b/tensorflow/compiler/xla/service/llvm_compiler.cc @@ -21,8 +21,24 @@ limitations under the License. #endif namespace xla { +Status LLVMCompiler::RunHloPassesOnModuleGroup( + HloModuleGroup* module_group, se::StreamExecutor* executor, + DeviceMemoryAllocator* device_allocator) { + return Unimplemented( + "Model partitioning not implemented for the CPU/GPU compilers!"); +} + +StatusOr>> +LLVMCompiler::RunBackendOnModuleGroup( + std::unique_ptr module_group, + std::vector> stream_exec, + DeviceMemoryAllocator* device_allocator) { + return Unimplemented( + "Model partitioning not implemented for the CPU/GPU compilers!"); +} + StatusOr>> LLVMCompiler::Compile( - std::vector> modules, + std::unique_ptr module_group, std::vector> stream_execs, DeviceMemoryAllocator* device_allocator) { // Tensorflow tries to enable the following behaviors in all its threads: @@ -38,6 +54,8 @@ StatusOr>> LLVMCompiler::Compile( tensorflow::port::ScopedDontFlushDenormal dont_flush_denormals; std::vector> result; + std::vector> modules = + module_group->ConsumeModules(); for (size_t i = 0; i < modules.size(); i++) { if (stream_execs[i].size() != 1) { return Unimplemented( diff --git a/tensorflow/compiler/xla/service/llvm_compiler.h b/tensorflow/compiler/xla/service/llvm_compiler.h index f1c623508c5307f2b1c036d3ec6823b75c7eda13..86abd5da0189feb0eadfde3d6dbab446eb2be900 100644 --- a/tensorflow/compiler/xla/service/llvm_compiler.h +++ b/tensorflow/compiler/xla/service/llvm_compiler.h @@ -69,8 +69,17 @@ class LLVMCompiler : public Compiler { using Compiler::RunBackend; using Compiler::RunHloPasses; + Status RunHloPassesOnModuleGroup( + HloModuleGroup* module_group, se::StreamExecutor* executor, + DeviceMemoryAllocator* device_allocator) override; + + StatusOr>> RunBackendOnModuleGroup( + std::unique_ptr module_group, + std::vector> stream_exec, + DeviceMemoryAllocator* device_allocator) override; + StatusOr>> Compile( - std::vector> modules, + std::unique_ptr module_group, std::vector> stream_execs, DeviceMemoryAllocator* device_allocator) override; diff --git a/tensorflow/compiler/xla/service/llvm_ir/BUILD b/tensorflow/compiler/xla/service/llvm_ir/BUILD index 6223a34b1258961944a3ac64cd10876d1272c94e..5f7ad81d82978d0a752b33d12b72e16f0c1c6826 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/BUILD +++ b/tensorflow/compiler/xla/service/llvm_ir/BUILD @@ -204,7 +204,6 @@ cc_library( "//tensorflow/compiler/xla/service/gpu:partition_assignment", "//tensorflow/core:lib", "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:optional", "@llvm//:core", "@llvm//:support", ], diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc index 944c79580c133906cd431722fd6b29e6aee5f918..05ba4a40da413f0e774214e55ef69d023afc48e2 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.cc @@ -15,9 +15,10 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/sort_util.h" +#include + // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc" #include "absl/strings/string_view.h" -#include "absl/types/optional.h" #include "llvm/ADT/APInt.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -43,7 +44,7 @@ namespace { void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index, const IrArray::Index& compare_keys_index, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, llvm::IRBuilder<>* b) { // if (is_smaller_index && // compare_keys[dimension_to_sort] < dimension_to_sort_bound) @@ -100,19 +101,18 @@ void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index, // Swap key1 with key2. keys_array.EmitWriteArrayElement(keys_index, key2, b); keys_array.EmitWriteArrayElement(compare_keys_index, key1, b); - if (values_array.has_value()) { + for (const auto& values_array : values_arrays) { // Also swap the values. - auto value1 = values_array.value().EmitReadArrayElement(keys_index, b); - auto value2 = - values_array.value().EmitReadArrayElement(compare_keys_index, b); - values_array.value().EmitWriteArrayElement(keys_index, value2, b); - values_array.value().EmitWriteArrayElement(compare_keys_index, value1, b); + auto value1 = values_array.EmitReadArrayElement(keys_index, b); + auto value2 = values_array.EmitReadArrayElement(compare_keys_index, b); + values_array.EmitWriteArrayElement(keys_index, value2, b); + values_array.EmitWriteArrayElement(compare_keys_index, value1, b); } } } // namespace Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, absl::string_view name, llvm::Value* xor_mask, llvm::IRBuilder<>* b, const gpu::LaunchDimensions* launch_dimensions) { @@ -162,7 +162,7 @@ Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, compare_keys_index[dimension_to_sort] = b->CreateXor(compare_index[0], xor_mask); EmitCompareLoop(dimension_to_sort, keys_index, compare_keys_index, - keys_array, values_array, b); + keys_array, values_arrays, b); return Status::OK(); }; if (launch_dimensions != nullptr) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h index 527ed10374ce9482045a8459e38fd041e0e83001..2f3bcda2307bcbb35a03b9e71dbbe44e366b3820 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/sort_util.h +++ b/tensorflow/compiler/xla/service/llvm_ir/sort_util.h @@ -16,8 +16,9 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_ +#include + #include "absl/strings/string_view.h" -#include "absl/types/optional.h" #include "llvm/IR/Value.h" #include "tensorflow/compiler/xla/service/gpu/partition_assignment.h" #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h" @@ -31,7 +32,7 @@ namespace llvm_ir { // implements the inner loop of BitonicSort. If 'launch_dimensions' is nullptr, // the inner compare loop will not be parallelized. Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array, - const absl::optional& values_array, + const std::vector& values_arrays, absl::string_view name, llvm::Value* xor_mask, llvm::IRBuilder<>* b, const gpu::LaunchDimensions* launch_dimensions); diff --git a/tensorflow/compiler/xla/service/local_service.cc b/tensorflow/compiler/xla/service/local_service.cc index 0d0fb7946ae6815905491ca55652d7d0ab278a3c..cca37556173bb95ef062b59ab0a4bf9ca7c496fe 100644 --- a/tensorflow/compiler/xla/service/local_service.cc +++ b/tensorflow/compiler/xla/service/local_service.cc @@ -144,8 +144,8 @@ StatusOr> LocalService::CompileExecutable( const absl::Span argument_layouts, const ExecutableBuildOptions& build_options) { const HloModuleProto& proto = computation.proto(); - TF_RET_CHECK(proto.has_program_shape()); - const ProgramShape& program_shape = proto.program_shape(); + TF_RET_CHECK(proto.has_host_program_shape()); + const ProgramShape& program_shape = proto.host_program_shape(); // Validate incoming layouts. if (argument_layouts.size() != program_shape.parameters_size()) { diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index b27a92f2a0761a2bccd97eb2c0467ead27565c37..75465359f8f37e56369c0976ba7434e3c3f202cc 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -207,7 +207,7 @@ Status Service::ValidateResultShape(const Shape& client_shape, StatusOr>> Service::ResolveAndValidateArguments( absl::Span arguments, - absl::Span stream_executors) { + absl::Span stream_executors) const { CHECK_EQ(options_.number_of_replicas(), stream_executors.size()); std::vector> replicated_arguments; replicated_arguments.resize(options_.number_of_replicas()); @@ -341,19 +341,19 @@ StatusOr>> Service::BuildExecutables( } CHECK_EQ(module_protos.size(), module_configs.size()); - std::vector> modules; + auto module_group = + absl::make_unique(module_protos[0]->name()); for (int64 i = 0; i < module_protos.size(); ++i) { const HloModuleProto* proto = module_protos[i]; const HloModuleConfig& config = *module_configs[i]; - TF_ASSIGN_OR_RETURN(auto module, - HloModule::CreateFromProto(*proto, config)); - modules.push_back(std::move(module)); + TF_ASSIGN_OR_RETURN(auto module, CreateModuleFromProto(*proto, config)); + module_group->push_back(std::move(module)); } TF_ASSIGN_OR_RETURN( std::vector> executables, - backend->compiler()->Compile(std::move(modules), std::move(executors), - device_allocator)); + backend->compiler()->Compile(std::move(module_group), + std::move(executors), device_allocator)); for (size_t i = 0; i < module_protos.size(); ++i) { if (!module_configs[i]->debug_options().xla_dump_executions_to().empty()) { @@ -590,7 +590,7 @@ StatusOr> Service::GetExecutors( StatusOr>> Service::GetArguments( const ExecutionOptions& execution_options, - absl::Span arguments) { + absl::Span arguments) const { // Resolve the allocations for the arguments of the computation, and create // a vector of device memory offsets for the arguments from the allocations. // In the case of partitioned computations, assume all arguments go on the @@ -634,7 +634,7 @@ Status Service::ExecuteGraphParallel(const ExecuteGraphParallelRequest* arg, arg->requests(i).execution_options(); const ExecuteGraphRequest& request = arg->requests(i); TF_RET_CHECK(request.has_computation()) << "computations may not be empty"; - TF_RET_CHECK(request.computation().has_program_shape()) + TF_RET_CHECK(request.computation().has_host_program_shape()) << "programe shape may not be empty"; // Get the executors. @@ -651,7 +651,7 @@ Status Service::ExecuteGraphParallel(const ExecuteGraphParallelRequest* arg, // replica 0. TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, - CreateModuleConfig(request.computation().program_shape(), + CreateModuleConfig(request.computation().host_program_shape(), replicated_arguments.front(), request.execution_options())); VLOG(3) @@ -810,7 +810,7 @@ StatusOr> Service::BuildExecutable( } TF_ASSIGN_OR_RETURN(std::unique_ptr module, - HloModule::CreateFromProto(module_proto, *module_config)); + CreateModuleFromProto(module_proto, *module_config)); TF_RETURN_IF_ERROR(MaybeDumpUnoptimizedHloModule(*module)); @@ -836,7 +836,7 @@ Status Service::ExecuteGraph(const ExecuteGraphRequest* arg, if (!arg->has_computation()) { return InvalidArgument("computations may not be empty"); } - if (!arg->computation().has_program_shape()) { + if (!arg->computation().has_host_program_shape()) { return InvalidArgument("programe shape may not be empty"); } @@ -851,10 +851,11 @@ Status Service::ExecuteGraph(const ExecuteGraphRequest* arg, std::vector> replicated_arguments, ResolveAndValidateArguments(arg->arguments(), replicas)); - TF_ASSIGN_OR_RETURN(std::unique_ptr module_config, - CreateModuleConfig(arg->computation().program_shape(), - replicated_arguments.front(), - arg->execution_options())); + TF_ASSIGN_OR_RETURN( + std::unique_ptr module_config, + CreateModuleConfig(arg->computation().host_program_shape(), + replicated_arguments.front(), + arg->execution_options())); TF_ASSIGN_OR_RETURN( std::unique_ptr executable, @@ -1063,15 +1064,15 @@ Status Service::ComputeConstantGraph(const ComputeConstantGraphRequest* arg, if (!arg->has_computation()) { return InvalidArgument("computations may not be empty"); } - if (!arg->computation().has_program_shape()) { + if (!arg->computation().has_host_program_shape()) { return InvalidArgument("program shape may not be empty"); } - if (arg->computation().program_shape().parameters_size() != 0) { + if (arg->computation().host_program_shape().parameters_size() != 0) { return InvalidArgument( "constant computation may not depend on any parameters."); } - ProgramShape program_shape = arg->computation().program_shape(); + ProgramShape program_shape = arg->computation().host_program_shape(); TF_DCHECK_OK(ShapeUtil::ValidateShape(program_shape.result())); if (arg->has_output_layout()) { TF_RETURN_IF_ERROR(LayoutUtil::ValidateLayoutForShape( @@ -1081,7 +1082,7 @@ Status Service::ComputeConstantGraph(const ComputeConstantGraphRequest* arg, HloModuleConfig config(program_shape); TF_ASSIGN_OR_RETURN(std::unique_ptr module, - HloModule::CreateFromProto(arg->computation(), config)); + CreateModuleFromProto(arg->computation(), config)); HloEvaluator evaluator; TF_ASSIGN_OR_RETURN(auto result_literal, evaluator.Evaluate( @@ -1111,14 +1112,14 @@ Status Service::GetComputationGraphStats( if (!arg->has_computation()) { return InvalidArgument("Computations may not be empty."); } - if (!arg->computation().has_program_shape()) { + if (!arg->computation().has_host_program_shape()) { return InvalidArgument("Program shape may not be empty."); } - HloModuleConfig config(arg->computation().program_shape()); + HloModuleConfig config(arg->computation().host_program_shape()); config.set_debug_options(arg->debug_options()); TF_ASSIGN_OR_RETURN(std::unique_ptr module, - HloModule::CreateFromProto(arg->computation(), config)); + CreateModuleFromProto(arg->computation(), config)); hlo_graph_dumper::MaybeDumpHloModule(*module, "computation statistics subject"); diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 1f62fad4c8079eba7013b3f647fe19bbc031fc77..8cf1a7b9f01fbb3572c6849c8b18e14174ced89f 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -191,7 +191,7 @@ class Service : public ServiceInterface { // Prepare the arguments for executing parallel. StatusOr>> GetArguments( const ExecutionOptions& execution_options, - absl::Span arguments); + absl::Span arguments) const; protected: friend class LocalExecutable; @@ -208,7 +208,7 @@ class Service : public ServiceInterface { StatusOr>> ResolveAndValidateArguments( absl::Span arguments, - absl::Span stream_executors); + absl::Span stream_executors) const; // Create a Hlo module config for the given program shape and arguments. // execution_options is optional; if not given a default is used. diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index e379911462f1d2caa53f708a6ebf8b7363dc2fc3..25afc23e5b41468ad5dd1abed076e399cf20f350 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -919,6 +919,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, switch (opcode) { case HloOpcode::kMaximum: case HloOpcode::kMinimum: + return InferElementwiseBinaryOpShape(opcode, lhs, rhs, + broadcast_dimensions); + case HloOpcode::kSubtract: case HloOpcode::kAdd: case HloOpcode::kAtan2: @@ -929,6 +932,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, case HloOpcode::kShiftLeft: case HloOpcode::kShiftRightArithmetic: case HloOpcode::kShiftRightLogical: + if (lhs.element_type() == PRED || rhs.element_type() == PRED) { + return InvalidArgument( + "Expected element type in shape to be arithmetic type for " + "operation %s; got PRED.", + HloOpcodeString(opcode)); + } return InferElementwiseBinaryOpShape(opcode, lhs, rhs, broadcast_dimensions); @@ -1029,17 +1038,22 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(HloOpcode operation, case HloOpcode::kSort: { if (operand_shapes.size() == 1) { return *operand_shapes[0]; - } else if (operand_shapes.size() == 2) { - if (!ShapeUtil::SameDimensions(*operand_shapes[0], - *operand_shapes[1])) { - return InvalidArgument( - "Sort keys and values dimensions must match. " - "Keys shape is: %s\n, Values shape is: %s", - ShapeUtil::HumanString(*operand_shapes[0]), - ShapeUtil::HumanString(*operand_shapes[1])); + } else { + for (int64 operand = 1; operand < operand_shapes.size(); ++operand) { + if (!ShapeUtil::SameDimensions(*operand_shapes[0], + *operand_shapes[operand])) { + return InvalidArgument( + "Sort keys and values dimensions must match. " + "Keys shape is: %s\n, Values shape (operand index %lld) is: %s", + ShapeUtil::HumanString(*operand_shapes[0]), operand, + ShapeUtil::HumanString(*operand_shapes[operand])); + } + } + std::vector operand_shape_values; + for (const Shape* operand_shape : operand_shapes) { + operand_shape_values.push_back(*operand_shape); } - return ShapeUtil::MakeTupleShape( - {*operand_shapes[0], *operand_shapes[1]}); + return ShapeUtil::MakeTupleShape(operand_shape_values); } return InvalidArgument("Unexpected number of operands for sort"); } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 864ed43118cd066f6ce14cd808b873f137b8414a..7b65e8c1c9d2bc730c6c8550e9265b69fdde71cf 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -1618,13 +1618,37 @@ TEST_F(ShapeInferenceTest, BadSort) { auto values = ShapeUtil::MakeShape(F32, {5}); StatusOr statusor = ShapeInference::InferVariadicOpShape(HloOpcode::kSort, {&keys, &values}); - ASSERT_FALSE(statusor.ok()); + EXPECT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("dimensions must match")) + << statusor.status(); +} +TEST_F(ShapeInferenceTest, BadSortValuesMismatch) { + auto keys = ShapeUtil::MakeShape(F32, {4}); + auto values_good = ShapeUtil::MakeShape(F32, {4}); + auto values_bad = ShapeUtil::MakeShape(F32, {5}); + StatusOr statusor = ShapeInference::InferVariadicOpShape( + HloOpcode::kSort, {&keys, &values_good, &values_bad}); + EXPECT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), HasSubstr("dimensions must match")) << statusor.status(); } +TEST_F(ShapeInferenceTest, SortManyValues) { + auto keys = ShapeUtil::MakeShape(F32, {4}); + auto values_s32 = ShapeUtil::MakeShape(S32, {4}); + auto values_u32 = ShapeUtil::MakeShape(U32, {4}); + StatusOr statusor = ShapeInference::InferVariadicOpShape( + HloOpcode::kSort, {&keys, &values_s32, &values_u32}); + EXPECT_IS_OK(statusor); + Shape inferred_shape = statusor.ValueOrDie(); + EXPECT_TRUE(ShapeUtil::Compatible( + inferred_shape, + ShapeUtil::MakeTupleShape({keys, values_s32, values_u32}))); +} + class ScatterGatherShapeInferenceTest : public ShapeInferenceTest { protected: const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {}); diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index 811ac55e2dc2939293e62f1ebcd2bce266a12133..96f3055c98e0611dfe25517cb490014a6d1f7c76 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -148,7 +148,7 @@ TuplePointsToAnalysis::Run(const HloModule* module) { Status TuplePointsToAnalysis::Analyze() { per_instruction_.clear(); - per_instruction_.resize(module_->NumUniqueInstructionIds()); + per_instruction_.reserve(module_->instruction_count()); logical_buffer_aliases_.clear(); logical_buffer_aliases_.resize( @@ -756,6 +756,7 @@ bool TuplePointsToAnalysis::CanShareOperandBufferWithUser( } } if (user->opcode() == HloOpcode::kDynamicUpdateSlice || + user->opcode() == HloOpcode::kScatter || user->opcode() == HloOpcode::kWhile) { // We eliminated other users in BufferLiveness::live_range_strictly_before, // so here we just need to check that the use is at operand index 0. diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h index 30c365053c5dac5af3c559f7c92b11d389d7fff8..bcfcb388f95b0bedb35a8c399e804034816867b3 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.h +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.h @@ -23,6 +23,7 @@ limitations under the License. #include #include +#include "absl/container/flat_hash_map.h" #include "absl/container/inlined_vector.h" #include "absl/types/span.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" @@ -315,14 +316,23 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { const PerInstruction* PerInst(const HloInstruction* inst) const { int id = inst->unique_id(); DCHECK_GE(id, 0); - DCHECK_LT(id, per_instruction_.size()); - return &per_instruction_[id]; + auto iter = per_instruction_.find(id); + if (iter == per_instruction_.end()) { + LOG(FATAL) << "Expected per-instruction information to already exist"; + } else { + return iter->second.get(); + } } PerInstruction* PerInst(const HloInstruction* inst) { int id = inst->unique_id(); DCHECK_GE(id, 0); - DCHECK_LT(id, per_instruction_.size()); - return &per_instruction_[id]; + auto iter = per_instruction_.find(id); + if (iter == per_instruction_.end()) { + return per_instruction_.emplace(id, absl::make_unique()) + .first->second.get(); + } else { + return iter->second.get(); + } } std::vector> GetAllUsesOfInstructionAtIndex( @@ -339,7 +349,7 @@ class TuplePointsToAnalysis : public DfsHloVisitorWithDefault { const std::unique_ptr logical_buffer_analysis_; // A map from instruction->unique_id() to - std::vector per_instruction_; + absl::flat_hash_map> per_instruction_; // A map from LogicalBuffer->id() to alias information about that logical // buffer diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index e9a07b14ed685fa4388aca583395370a60176cca..d9ebebf74ed846aa05326a4df72019ef3e71ad88 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -1010,6 +1010,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { points_to_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {})); } +TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) { + const char* hlo_text = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); + computation_ = module_->entry_computation(); + RunAnalysis(); + + HloInstruction* operand_param = computation_->parameter_instruction(0); + HloInstruction* indices_param = computation_->parameter_instruction(1); + HloInstruction* updates_param = computation_->parameter_instruction(2); + HloInstruction* scatter = computation_->root_instruction(); + + EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser( + operand_param, {}, scatter, {})); + EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( + indices_param, {}, scatter, {})); + EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( + updates_param, {}, scatter, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto builder = HloComputation::Builder(TestName()); @@ -1035,7 +1073,8 @@ TEST_F(CanShareOperandBufferWithUserTest, SortCanShareWithTupleUser) { auto values = builder.AddInstruction( HloInstruction::CreateParameter(1, values_shape, "values")); auto sort = builder.AddInstruction(HloInstruction::CreateSort( - ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, values)); + ShapeUtil::MakeTupleShape({keys_shape, values_shape}), 0, keys, + {values})); BuildModuleAndRunAnalysis(builder.Build()); diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index d244923532d8963dcc4a7433b8d353ff5dc483f2..7a34c0fb2641db3062337f9abf33b09a817f5bf5 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -461,8 +461,9 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout( return ShapeUtil::IsArray(shape) && ElementsIn(shape) == 0; } -/* static */ bool ShapeUtil::IsScalarF32(const Shape& shape) { - return shape.element_type() == F32 && Rank(shape) == 0; +/* static */ bool ShapeUtil::IsScalarWithElementType( + const Shape& shape, PrimitiveType element_type) { + return IsScalar(shape) && shape.element_type() == element_type; } namespace { @@ -596,7 +597,8 @@ StatusOr ParseShapeStringInternal(absl::string_view* s) { // we convert in to the RE2-consumable type and then consume the corresponding // amount from our string_view type. static LazyRE2 shape_pattern = { - "^(\\w*\\d*)\\[([\\d,]*)\\](?:\\s*(dense|sparse)?\\s*{([\\d,]+)})?"}; + "^(\\w*\\d*)\\[([\\d,\\s]*)\\](?:\\s*(dense|sparse)?\\s*{([\\d,\\s]+)})" + "?"}; tensorflow::RegexpStringPiece s_consumable(s->data(), s->size()); if (RE2::Consume(&s_consumable, *shape_pattern, &element_type_string, &dimensions_string, &format_string, &layout_string)) { @@ -1645,7 +1647,7 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, } std::ostream& operator<<(std::ostream& out, const Shape& shape) { - out << ShapeUtil::HumanString(shape); + out << ShapeUtil::HumanStringWithLayout(shape); return out; } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index d8bb27beae64bb665c79c2cd7134f613495529cc..51cedce7f0e13e65dfd0e250689e0ecd30f971dc 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -72,7 +72,7 @@ class ShapeIndex { void push_back(int64 value) { indices_.push_back(value); } void pop_back() { indices_.pop_back(); } - // push_front is O(n^2), but shapes don't usually have a ton of dimensions. + // push_front is O(n), but shapes don't usually have a ton of dimensions. void push_front(int64 value) { indices_.insert(indices_.begin(), value); } using container_type = absl::InlinedVector; @@ -312,7 +312,10 @@ class ShapeUtil { static bool IsEffectiveScalar(const Shape& shape) { return IsArray(shape) && TrueRank(shape) == 0; } - static bool IsScalarF32(const Shape& shape); + + // Returns whether "shape" is a scalar (array) with the given element_type. + static bool IsScalarWithElementType(const Shape& shape, + PrimitiveType element_type); // Extracts the size of the shape's dimension at dimension number // GetDimensionNumber(dimension_number). diff --git a/tensorflow/compiler/xla/tests/codegen_test_base.cc b/tensorflow/compiler/xla/tests/codegen_test_base.cc index 022641394f113ef28e7c53058385d77572822213..fbebe0408730f2fb37aa57a0f19291bbaa3826f9 100644 --- a/tensorflow/compiler/xla/tests/codegen_test_base.cc +++ b/tensorflow/compiler/xla/tests/codegen_test_base.cc @@ -32,11 +32,10 @@ StatusOr> CodegenTestBase::CompileToAotCompilationResult( std::unique_ptr hlo_module, const AotCompilationOptions& options) { - std::vector> hlo_modules; - hlo_modules.push_back(std::move(hlo_module)); + auto module_group = absl::make_unique(std::move(hlo_module)); TF_ASSIGN_OR_RETURN( std::vector> results, - backend().compiler()->CompileAheadOfTime(std::move(hlo_modules), + backend().compiler()->CompileAheadOfTime(std::move(module_group), options)); return std::move(results.front()); } diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index b851db14ec048a20947fb8136a31e457d3922f86..3aebf784664dac14ba2ea45c5a229b7b2e4fc39d 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -883,7 +883,7 @@ XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { // (We run this test on all platforms, because, what the heck.) XLA_TEST_F(ConvolutionTest, NoCudnnAlgorithmPicker) { execution_options_.mutable_debug_options()->add_xla_disable_hlo_passes( - "cudnn-convolution-algorithm-picker"); + "cudnn-conv-algorithm-picker"); XlaBuilder builder(TestName()); Shape input_shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2}); diff --git a/tensorflow/compiler/xla/tests/custom_call_test.cc b/tensorflow/compiler/xla/tests/custom_call_test.cc index a693fa35954bcb2d95074c94d0aa3eabc1d5fd62..001490c6a8c568656437465054ee4db40d0d8dee 100644 --- a/tensorflow/compiler/xla/tests/custom_call_test.cc +++ b/tensorflow/compiler/xla/tests/custom_call_test.cc @@ -105,8 +105,7 @@ XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(CustomCallR2F32Reduce)) { LiteralTestUtil::ExpectR0Near(10.0f, result, error_spec_); } -XLA_TEST_F(CustomCallTest, - DISABLED_ON_GPU(CustomCall_UsedInOtherComputations)) { +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(UsedInOtherComputations)) { auto module = CreateNewModule(); auto b = HloComputation::Builder(TestName()); @@ -130,6 +129,53 @@ XLA_TEST_F(CustomCallTest, Array3D{{{2, 3}, {4, 5}}, {{3, 4}, {5, 6}}}, result); } +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(InputAndOutputLayoutDiffer)) { + auto module = CreateNewModule(); + auto b = HloComputation::Builder(TestName()); + + auto input = + b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p")); + b.AddInstruction( + HloInstruction::CreateCustomCall(r2f32_, {input}, "Add1ToValues")); + + module->AddEntryComputation(b.Build()); + ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0})); + ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1})); + + Literal argument = LiteralUtil::CreateR2({{1.f, 2.f}, {3.f, 4.f}}); + + // Note, the expected result is transposed! This is because the input and + // output layouts of the custom call differ and the called function just + // blindly adds one to each element. + Literal result = ExecuteAndTransfer(std::move(module), {&argument}); + LiteralTestUtil::ExpectR2Equal({{2.f, 4.f}, {3.f, 5.f}}, result); +} + +XLA_TEST_F(CustomCallTest, DISABLED_ON_GPU(LayoutConstrained)) { + // The argument and result of the computation are set to different layouts, + // but the custom call is layout constrained to a fixed operand and result + // layout, so the correct result should be produced. + auto module = CreateNewModule(); + auto b = HloComputation::Builder(TestName()); + + auto input = + b.AddInstruction(HloInstruction::CreateParameter(0, r2f32_, "p")); + + const Shape& r2f32_dim0_major = + ShapeUtil::MakeShapeWithLayout(F32, {2, 2}, {1, 0}); + b.AddInstruction(HloInstruction::CreateCustomCall( + r2f32_dim0_major, {input}, "Add1ToValues", {r2f32_dim0_major})); + + module->AddEntryComputation(b.Build()); + ForceParameterLayout(module.get(), 0, LayoutUtil::MakeLayout({1, 0})); + ForceResultLayout(module.get(), LayoutUtil::MakeLayout({0, 1})); + + Literal argument = LiteralUtil::CreateR2({{1.f, 2.f}, {3.f, 4.f}}); + + Literal result = ExecuteAndTransfer(std::move(module), {&argument}); + LiteralTestUtil::ExpectR2Equal({{2.f, 3.f}, {4.f, 5.f}}, result); +} + class CustomCallClientAPITest : public ClientLibraryTestBase {}; // When using the client API, CustomCall targets can't begin with '$' -- these diff --git a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc index 8d658695576035cdc34a213847460dd80de5f67e..c622b295094e53e63d0ed692d428bc97724c787c 100644 --- a/tensorflow/compiler/xla/tests/llvm_compiler_test.cc +++ b/tensorflow/compiler/xla/tests/llvm_compiler_test.cc @@ -93,15 +93,16 @@ class LLVMCompilerTest : public ::testing::Test { std::unique_ptr hlo_module = CreateNewModule(); hlo_module->AddEntryComputation(builder.Build()); - std::vector> modules; - modules.push_back(hlo_module->Clone()); - modules.push_back(std::move(hlo_module)); + auto module_group = absl::make_unique("test_module_group"); + module_group->push_back(hlo_module->Clone()); + module_group->push_back(std::move(hlo_module)); std::vector> executors; executors.push_back({backend_->default_stream_executor()}); executors.push_back({backend_->default_stream_executor()}); - EXPECT_IS_OK(compiler->Compile(std::move(modules), std::move(executors), + EXPECT_IS_OK(compiler->Compile(std::move(module_group), + std::move(executors), /*device_allocator=*/nullptr)); } @@ -150,12 +151,12 @@ TEST_F(GpuCompilerTest, HooksTest) { TestCompilerHooks(&compiler); } -TEST_F(CpuCompilerTest, MultiModuleCompilation) { +TEST_F(CpuCompilerTest, CpuMultiModuleCompilation) { cpu::CpuCompiler compiler; TestMultiModuleCompilation(&compiler); } -TEST_F(GpuCompilerTest, MultModuleCompilation) { +TEST_F(GpuCompilerTest, NVPTXMultiModuleCompilation) { gpu::NVPTXCompiler compiler; TestMultiModuleCompilation(&compiler); } diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index c25ccafaf83cf1b29095a77eefa357d9af08dc60..22fe4a2670e2e0e1fedc45036a1ceec19f44e42e 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -638,6 +638,8 @@ class R4ReduceWindowTest : public ReduceWindowTestBase, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, /*padding=*/padding); CHECK(reducer == kAdd || reducer == kMax); @@ -1158,7 +1160,10 @@ class R2ReduceWindowTest : public ReduceWindowTestBase, /*init_value=*/init_value, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, - /*window_strides=*/param.strides, /*padding=*/padding); + /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, + /*padding=*/padding); auto reduce_func = param.reducer == kAdd ? +[](float a, float b) { return a + b; } @@ -1369,7 +1374,10 @@ TEST_P(R1ReduceWindowTest, DoIt) { /*init_value=*/init_value, /*computation=*/computation, /*window_dimensions=*/param.window_bounds, - /*window_strides=*/param.strides, /*padding=*/padding); + /*window_strides=*/param.strides, + /*base_dilations=*/{}, + /*window_dilations=*/{}, + /*padding=*/padding); auto reduce_func = param.reducer == kAdd ? +[](float a, float b) { return a + b; } diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc index b21dd56045e1dc11847e213852dea60cd033be7b..7e1f4aa0eb4801876d9bdbac6a4d7f1d09f81ba8 100644 --- a/tensorflow/compiler/xla/tests/scatter_test.cc +++ b/tensorflow/compiler/xla/tests/scatter_test.cc @@ -69,6 +69,37 @@ ENTRY main { RunTest(hlo_text, &operand, &scatter_indices, &updates); } +XLA_TEST_F(ScatterTest, TensorFlowScatterV1_WithFusedAdds) { + const string hlo_text = R"( +HloModule TensorFlowScatterV1 + +update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) +} + +ENTRY main { + p0 = s32[3,3] parameter(0) + operand = s32[3,3] add(p0, p0) + p1 = s32[2] parameter(1) + indices = s32[2] add(p1, p1) + p2 = s32[2,3] parameter(2) + updates = s32[2,3] add(p2, p2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 +} +)"; + Literal operand = + LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + Literal scatter_indices = LiteralUtil::CreateR1({0, 1}); + Literal updates = LiteralUtil::CreateR2({{10, 20, 30}, {70, 80, 90}}); + RunTest(hlo_text, &operand, &scatter_indices, &updates); +} + XLA_TEST_F(ScatterTest, TensorFlowScatterV2_Update) { const char* hlo_text = R"( HloModule TensorFlowScatterV2 @@ -98,6 +129,37 @@ ENTRY main { RunTest(hlo_text, &operand, &scatter_indices, &updates); } +XLA_TEST_F(ScatterTest, SimpleR4) { + const char* hlo_text = R"( +HloModule SimpleR4 + +add_f32 (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(f32[] lhs, f32[] rhs) +} + +ENTRY main { + operand = f32[1,2,2,1] parameter(0) + indices = s32[1,3] parameter(1) + updates = f32[1,2,2,1] parameter(2) + ROOT scatter = f32[1,2,2,1] scatter(operand, indices, updates), + to_apply=add_f32, + update_window_dims={1,2,3}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0, 2, 1}, + index_vector_dim=1 +} +)"; + + Literal operand = + LiteralUtil::CreateR4({{{{0.f}, {0.f}}, {{0.f}, {0.f}}}}); + Literal updates = + LiteralUtil::CreateR4({{{{0.12}, {0.28}}, {{0.018}, {0.42}}}}); + Literal scatter_indices = LiteralUtil::CreateR2({{0, 0, 0}}); + RunTest(hlo_text, &operand, &scatter_indices, &updates); +} + XLA_TEST_F(ScatterTest, TensorFlowScatter_Add) { const string hlo_text = R"( HloModule TensorFlowScatter_Add diff --git a/tensorflow/compiler/xla/tests/test_utils.cc b/tensorflow/compiler/xla/tests/test_utils.cc index 5155f0c652c7c6dbba60c421159494fa28072090..2f18036ff4c5b0bfa28723fb181c33fa6995eb80 100644 --- a/tensorflow/compiler/xla/tests/test_utils.cc +++ b/tensorflow/compiler/xla/tests/test_utils.cc @@ -272,9 +272,11 @@ std::vector FindConstrainedUses( constrained_uses.insert(constrained_uses.end(), converted_uses.begin(), converted_uses.end()); } else if (opcode == HloOpcode::kSort && - instruction->operand_count() == 2 && op_num == 0) { + instruction->operand_count() >= 2 && op_num == 0) { // Operand 0 of sort is the array of keys used for key/value - // (two-operand) kSort instructions. + // (two-operand) kSort instructions. Since sort stability is not + // guaranteed, constrain keys of key-value sort not to have duplicates, + // since otherwise the value order may legitimately differ. constrained_uses.push_back(instruction); } } diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index 8b1b9e151992296b9d022ae1d9d974eadd2074a8..6d5f276e82087cedc356691b0ff08df24cec8d20 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -48,7 +48,7 @@ class WhileTest : public ClientLibraryTestBase {}; // while (result < 5) { // result = result + 1; // } -TEST_F(WhileTest, WhileWithScalarS32Result) { +XLA_TEST_F(WhileTest, WhileWithScalarS32Result) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. @@ -84,7 +84,7 @@ TEST_F(WhileTest, WhileWithScalarS32Result) { // while (result < 5) { // result = result + 1; // } -TEST_F(WhileTest, WhileWithScalarS64Result) { +XLA_TEST_F(WhileTest, WhileWithScalarS64Result) { auto result_shape = ShapeUtil::MakeShape(S64, {}); // Create a computation for the condition: repeat for 5 iterations. @@ -114,7 +114,7 @@ TEST_F(WhileTest, WhileWithScalarS64Result) { ComputeAndCompareR0(&builder, 5, {}); } -TEST_F(WhileTest, WhileWithScalarResultNonConstInit) { +XLA_TEST_F(WhileTest, WhileWithScalarResultNonConstInit) { auto result_shape = ShapeUtil::MakeShape(S32, {}); auto orig_shape = ShapeUtil::MakeShape(S32, {2}); @@ -147,7 +147,7 @@ TEST_F(WhileTest, WhileWithScalarResultNonConstInit) { ComputeAndCompareR0(&builder, 5, {}); } -TEST_F(WhileTest, WhileWithPredicateResult) { +XLA_TEST_F(WhileTest, WhileWithPredicateResult) { auto result_shape = ShapeUtil::MakeShape(PRED, {}); // Create a computation for the condition: run until condition is true. @@ -184,7 +184,7 @@ TEST_F(WhileTest, WhileWithPredicateResult) { // while (result.sum() < 15.5f) { // result = result + vector(0); // } -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) { Shape result_shape = ShapeUtil::MakeShape(F32, {0}); // Create a computation for the reduction. @@ -238,7 +238,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) { // while (result.sum() < 15.5f) { // result = result + vector(8, 0.125f); // } -TEST_F(WhileTest, WhileWithVectorResult) { +XLA_TEST_F(WhileTest, WhileWithVectorResult) { Shape result_shape = ShapeUtil::MakeShape(F32, {8}); // Create a computation for the reduction. @@ -298,7 +298,7 @@ TEST_F(WhileTest, WhileWithVectorResult) { // result = result + vector(8, 0.125f); // } // tuple = tuple { while } -TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { +XLA_TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { Shape result_shape = ShapeUtil::MakeShape(F32, {8}); // Create a computation for the reduction. @@ -353,7 +353,7 @@ TEST_F(WhileTest, WhileWithVectorResultIntoTuple) { ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.0001)); } -TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { +XLA_TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { std::vector shape_elements = { ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})}; @@ -407,7 +407,7 @@ TEST_F(WhileTest, WhileWithPermutationAndTupleResult) { ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.0001)); } -TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { +XLA_TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { std::vector shape_elements = { ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3}), ShapeUtil::MakeShape(F32, {3})}; @@ -465,7 +465,7 @@ TEST_F(WhileTest, WhileWithPermutationAndVectorResult) { // get<0>(result) = get<0>(result) + 1; // get<1>(result) = get<1>(result) + vector(10, 1.0f); // } -TEST_F(WhileTest, WhileWithTupleResult) { +XLA_TEST_F(WhileTest, WhileWithTupleResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -514,7 +514,7 @@ TEST_F(WhileTest, WhileWithTupleResult) { ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0.0001)); } -TEST_F(WhileTest, WhileWithPredicateTupleResult) { +XLA_TEST_F(WhileTest, WhileWithPredicateTupleResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(PRED, {})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -560,7 +560,7 @@ TEST_F(WhileTest, WhileWithPredicateTupleResult) { ComputeAndCompareTuple(&builder, expected, {}, ErrorSpec(0)); } -TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { +XLA_TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(S32, {})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -619,7 +619,7 @@ TEST_F(WhileTest, WhileWithTupleConstantScalarResult) { // get<1>(w1) = get<1>(w1) + vector(10, 1.0f); // } // result = get<1>(w0) + get<1>(w1) -TEST_F(WhileTest, TwoWhileWithTupleResult) { +XLA_TEST_F(WhileTest, TwoWhileWithTupleResult) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -698,7 +698,7 @@ TEST_F(WhileTest, TwoWhileWithTupleResult) { } // Test while nodes that share the while body computation. -TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { +XLA_TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -763,7 +763,7 @@ TEST_F(WhileTest, TwoWhileLoopsAndSharedBody) { ComputeAndCompareR1(&builder, expected, {}, ErrorSpec(0.0001)); } -TEST_F(WhileTest, WhileLoopsWithSharedBodyAndInit) { +XLA_TEST_F(WhileTest, WhileLoopsWithSharedBodyAndInit) { std::vector shape_elements = {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(F32, {10})}; Shape result_shape = ShapeUtil::MakeTupleShape(shape_elements); @@ -901,7 +901,7 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // Per backend the values generated can be different as the different backends // use different random number generators. // TODO(b/32240857): Extend test to verify outputs. -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { auto v6s32 = ShapeUtil::MakeShape(S32, {6}); // Create a computation for the condition: repeat for count iterations. @@ -947,7 +947,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { } } -TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) { +XLA_TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) { auto element_shape = ShapeUtil::MakeShape(F32, {2}); XlaBuilder outer("outer"); @@ -979,7 +979,7 @@ TEST_F(WhileTest, WhileThatSwapsParameterWithTupleElement) { ErrorSpec(1e-6)); } -TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) { +XLA_TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) { auto element_shape = ShapeUtil::MakeShape(F32, {2}); XlaBuilder outer("outer"); @@ -1004,7 +1004,7 @@ TEST_F(WhileTest, WhileThatSwapsParameterWithBroadcast) { ErrorSpec(1e-6)); } -TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) { +XLA_TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) { auto element_shape = ShapeUtil::MakeShape(F32, {}); XlaBuilder outer("outer"); @@ -1038,7 +1038,7 @@ TEST_F(WhileTest, WhileThatTurnsScalarParameterToTupleElement) { // result[0] = result[0] + 1; // result[1] = result[1] + 1; // } -TEST_F(WhileTest, WhileWithMixedTupleElements) { +XLA_TEST_F(WhileTest, WhileWithMixedTupleElements) { auto result_shape = ShapeUtil::MakeTupleShape( {ShapeUtil::MakeShape(S32, {}), ShapeUtil::MakeShape(S32, {})}); @@ -1146,7 +1146,7 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) { // while (f(result).get<0>()) { // result = result + 1; // } -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. @@ -1186,7 +1186,7 @@ TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { ComputeAndCompareR0(&builder, 5, {}); } -TEST_F(WhileTest, WhileWithLoopInvariantOperation) { +XLA_TEST_F(WhileTest, WhileWithLoopInvariantOperation) { auto matrix_shape = ShapeUtil::MakeShape(F32, {2, 2}); auto scalar_s32 = ShapeUtil::MakeShape(S32, {}); auto while_shape = ShapeUtil::MakeTupleShape( @@ -1230,7 +1230,7 @@ TEST_F(WhileTest, WhileWithLoopInvariantOperation) { {param_value.get()}, ErrorSpec(4e-5)); } -TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) { +XLA_TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileInfeedCondition)) { auto while_shape = ShapeUtil::MakeShape(S32, {}); XlaComputation condition; diff --git a/tensorflow/compiler/xla/tools/replay_computation.cc b/tensorflow/compiler/xla/tools/replay_computation.cc index 0c41f227b31ebe1f01073785ea2a666093aefdb3..f910e980535c073562473978662f73f4ee4bee79 100644 --- a/tensorflow/compiler/xla/tools/replay_computation.cc +++ b/tensorflow/compiler/xla/tools/replay_computation.cc @@ -83,7 +83,8 @@ std::unique_ptr CompileExecutable(const HloSnapshot& module, LocalClient* client) { XlaComputation computation(module.hlo().hlo_module()); std::vector argument_layouts; - for (const auto& param : computation.proto().program_shape().parameters()) { + for (const auto& param : + computation.proto().host_program_shape().parameters()) { argument_layouts.push_back(¶m); } return client diff --git a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc index 1d4f8d97f2ed8b263878b94b365b7fb5b949b1a2..dc62cf7a6b24e373374b458d2e4722e79500fb93 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_compile_ops.cc @@ -166,10 +166,22 @@ void XRTCompileOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Compiling XLA executable"; return Compile(ctx, computation_proto, program); })); - - Tensor output(DT_INT64, TensorShape({})); - output.scalar()() = uid; - ctx->set_output(0, output); + std::unique_ptr entry; + OP_REQUIRES_OK(ctx, cache->Lookup(uid, &entry)); + + Tensor handle_output(DT_INT64, TensorShape({})); + handle_output.scalar()() = uid; + ctx->set_output(0, handle_output); + + xla::LocalExecutable* executable = entry->get().get_executable(); + xla::ProgramShape program_shape = executable->executable() + ->module() + .config() + .entry_computation_layout() + .ComputeProgramShape(); + Tensor program_shape_output(DT_STRING, TensorShape({1})); + program_shape_output.vec()(0) = program_shape.SerializeAsString(); + ctx->set_output(1, program_shape_output); } XRTCompileOp::~XRTCompileOp() = default; diff --git a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc index 257b054f16a49f3e14e1d76746c9fe0ba7fa8658..3a1e03280a362f6048075be606865712efaffb77 100644 --- a/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc +++ b/tensorflow/compiler/xrt/kernels/xrt_execute_op.cc @@ -64,14 +64,6 @@ uint32 GetXLARandomSeed() { return counter.fetch_add(2); } -// Looks up the input `key` in the compilation cache. -Status GetComputationCacheEntry( - XRTCompilationCache* cache, int64 key, - std::unique_ptr* entry) { - TF_RETURN_IF_ERROR(cache->Lookup(key, entry)); - return Status::OK(); -} - // Populates `inputs` with the input tensors to the computation. Status GetComputationInputs(OpKernelContext* context, ResourceMgr* rm, bool release_inputs, diff --git a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc index 5cfc8711f9f4b4d54016156dd53471cadb34b581..7b3b50c69559f6003a108fdf6a1325dbdbaa80a6 100644 --- a/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc +++ b/tensorflow/compiler/xrt/ops/xrt_compile_ops.cc @@ -23,7 +23,12 @@ namespace tensorflow { REGISTER_OP("XRTCompile") .Input("computation: string") .Output("handle: int64") - .SetShapeFn(tensorflow::shape_inference::ScalarShape) + .Output("program_shape: string") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Scalar()); + c->set_output(1, c->UnknownShapeOfRank(1)); + return Status::OK(); + }) .Doc( R"( Reads a computation proto, compiles it, and places it in the global compilation diff --git a/tensorflow/compiler/xrt/tests/BUILD b/tensorflow/compiler/xrt/tests/BUILD index b6dcfc4eb96316b5dad95a65b04d0ae69e4485f6..be44a3474acdeb9905c1d21b932fa0dd10b5a212 100644 --- a/tensorflow/compiler/xrt/tests/BUILD +++ b/tensorflow/compiler/xrt/tests/BUILD @@ -29,8 +29,11 @@ cc_library( "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/client:xla_builder", "//tensorflow/compiler/xla/client:xla_computation", + "//tensorflow/compiler/xla/service:platform_util", "//tensorflow/compiler/xrt:xrt_proto", "//tensorflow/compiler/xrt:xrt_server", "//tensorflow/compiler/xrt/cc:xrt_ops", @@ -49,7 +52,10 @@ tf_cc_test( name = "raw_api_test_cpu", size = "medium", srcs = [], - args = ["--xla_test_device=XLA_CPU"], + args = [ + "--xla_test_device=XLA_CPU", + "--xla_platform=CPU", + ], deps = [ ":raw_api_test_lib", "//tensorflow/compiler/jit:xla_cpu_device", @@ -60,7 +66,10 @@ tf_cuda_cc_test( name = "raw_api_test_gpu", size = "medium", srcs = [], - args = ["--xla_test_device=XLA_GPU"], + args = [ + "--xla_test_device=XLA_GPU", + "--xla_platform=GPU", + ], tags = tf_cuda_tests_tags(), deps = [ ":raw_api_test_lib", diff --git a/tensorflow/compiler/xrt/tests/raw_api_test.cc b/tensorflow/compiler/xrt/tests/raw_api_test.cc index f590fbf0d9d85e6e8b041f6719ab6a14ec9e2191..ad42148ce398fe5bb4494891bfa42500f904aa3f 100644 --- a/tensorflow/compiler/xrt/tests/raw_api_test.cc +++ b/tensorflow/compiler/xrt/tests/raw_api_test.cc @@ -22,10 +22,13 @@ limitations under the License. #include "tensorflow/cc/framework/ops.h" #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/compiler/xla/client/xla_builder.h" #include "tensorflow/compiler/xla/client/xla_computation.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/platform_util.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/compiler/xrt/cc/ops/xrt_compile_ops.h" @@ -43,6 +46,7 @@ namespace tensorflow { namespace { string* xla_test_device_ptr; // initial value set in main() +string* xla_platform_ptr; // initial value set in main() string DeviceFromFlag() { string xla_test_device = *xla_test_device_ptr; @@ -128,6 +132,16 @@ xla::XlaComputation AddAndScale() { return builder.Build().ValueOrDie(); } +xla::XlaComputation AddS64() { + xla::XlaBuilder builder("AddS64"); + auto p0 = xla::Parameter(&builder, 0, xla::ShapeUtil::MakeShape(xla::S64, {}), + "P0"); + auto p1 = xla::Parameter(&builder, 1, xla::ShapeUtil::MakeShape(xla::S64, {}), + "P1"); + xla::Add(p0, p1); + return builder.Build().ValueOrDie(); +} + xla::XlaComputation AddAndTuple() { xla::XlaBuilder builder("AddAndTuple"); auto p0 = xla::Parameter(&builder, 0, @@ -145,6 +159,28 @@ void StoreComputationSnapshot(const xla::XlaComputation& computation, *dst = *snapshot; } +xla::ProgramShape XlaCompiledProgramShape( + const xla::XlaComputation& computation, + const xla::ProgramShape& input_program_shape) { + se::Platform* platform = + xla::PlatformUtil::GetPlatform(*xla_platform_ptr).ValueOrDie(); + xla::LocalClient* client = + xla::ClientLibrary::GetOrCreateLocalClient(platform).ValueOrDie(); + xla::ExecutableBuildOptions exec_options; + exec_options.set_result_layout(input_program_shape.result()); + std::vector parameters_shapes; + for (int64 i = 0; i < input_program_shape.parameters_size(); ++i) { + parameters_shapes.push_back(&input_program_shape.parameters(i)); + } + auto local_executable = + client->Compile(computation, parameters_shapes, exec_options) + .ValueOrDie(); + return local_executable->executable() + ->module() + .entry_computation() + ->ComputeProgramShape(); +} + TEST(RawApiTest, ReadAndWriteState) { xrt::XLAAllocation alloc; alloc.set_device_ordinal(0); @@ -338,20 +374,87 @@ TEST(RawApiTest, CompileAndExecute) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); ClientSession session(root); std::vector outputs; - TF_EXPECT_OK(session.Run({read_back}, &outputs)); + TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs)); xla::LiteralProto response; EXPECT_TRUE(response.ParseFromString(outputs[0].scalar()())); auto expected = xla::LiteralUtil::CreateR1({27.0f, 21.0f}); EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); + + xla::ProgramShape program_shape; + EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec()(0))); + EXPECT_EQ(program_shape.parameters_size(), 2); +} + +TEST(RawApiTest, CompileWithXlaReturnShapes) { + xla::XlaBuilder builder("XrtXlaShapes"); + auto input_shape = xla::ShapeUtil::MakeShape(xla::BF16, {32, 3, 128, 128}); + auto kernel_shape = xla::ShapeUtil::MakeShape(xla::BF16, {3, 3, 5, 5}); + // Clear layouts to signal XLA we are ready to get whatever are coming out of + // the compilation process. + xla::LayoutUtil::ClearLayout(&input_shape); + xla::LayoutUtil::ClearLayout(&kernel_shape); + auto param_shape = + xla::ShapeUtil::MakeTupleShape({input_shape, kernel_shape}); + auto param = xla::Parameter(&builder, 0, param_shape, "param"); + auto input = xla::GetTupleElement(param, 0); + auto kernel = xla::GetTupleElement(param, 1); + xla::Conv(input, kernel, {1, 1}, xla::Padding::kSame); + TF_ASSERT_OK_AND_ASSIGN(xla::XlaComputation xla_computation, builder.Build()); + + auto result_shape = xla_computation.GetProgramShape().ValueOrDie().result(); + // Clear the result shape layout to tell XLA we are accepting whatever are + // coming out of the compilation process. + xla::LayoutUtil::ClearLayout(&result_shape); + + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->add_parameters() = param_shape; + *shapes->mutable_result() = result_shape; + StoreComputationSnapshot(xla_computation, c.mutable_hlo_snapshot()); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto computation = + ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + auto release = ops::XRTReleaseCompilationHandle(root, c_handle.handle); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run(tensorflow::ClientSession::FeedType(), + {c_handle.program_shape}, {release}, &outputs)); + + xla::ProgramShape program_shape; + EXPECT_TRUE(program_shape.ParseFromString(outputs[0].vec()(0))); + EXPECT_EQ(program_shape.parameters_size(), 1); + + VLOG(2) << "Param: " + << xla::ShapeUtil::HumanStringWithLayout(program_shape.parameters(0)); + VLOG(2) << "Result: " + << xla::ShapeUtil::HumanStringWithLayout(program_shape.result()); + + xla::ProgramShape xla_program_shape = + XlaCompiledProgramShape(xla_computation, *shapes); + EXPECT_TRUE(xla::LayoutUtil::Equal( + xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {0}).layout(), + xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {0}) + .layout())); + EXPECT_TRUE(xla::LayoutUtil::Equal( + xla::ShapeUtil::GetSubshape(program_shape.parameters(0), {1}).layout(), + xla::ShapeUtil::GetSubshape(xla_program_shape.parameters(0), {1}) + .layout())); + EXPECT_TRUE(xla::LayoutUtil::Equal(program_shape.result().layout(), + xla_program_shape.result().layout())); } TEST(RawApiTest, CompileAndExecuteZeroArg) { @@ -371,7 +474,7 @@ TEST(RawApiTest, CompileAndExecuteZeroArg) { auto computation = ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); auto c_handle = ops::XRTCompile(root, computation); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, std::initializer_list({})); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -420,7 +523,7 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) { auto p1_value = ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); auto p1_handle = ops::XRTAllocate(root, p1_value); - auto result = ops::XRTExecute(root, c_handle, e_config, + auto result = ops::XRTExecute(root, c_handle.handle, e_config, {Output(p0_handle), Output(p1_handle)}); auto read_back = ops::XRTReadLiteralAndRelease(root, result); TF_ASSERT_OK(root.status()); @@ -437,15 +540,93 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) { EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); } +TEST(RawApiTest, LeakCompilationReference) { + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {2}); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {2}); + *shapes->mutable_result() = xla::ShapeUtil::MakeTupleShape( + {xla::ShapeUtil::MakeShape(xla::F32, {2})}); + StoreComputationSnapshot(AddAndTuple(), c.mutable_hlo_snapshot()); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto computation = + ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run({c_handle.handle}, &outputs)); +} + +TEST(RawApiTest, CompileAndExecuteWithS64Argument) { + xrt::XLAAllocation p0; + p0.set_device_ordinal(0); + *p0.mutable_value() = xla::LiteralUtil::CreateR0(11031965).ToProto(); + xrt::XLAAllocation p1; + p1.set_device_ordinal(0); + *p1.mutable_value() = xla::LiteralUtil::CreateR0(4091934).ToProto(); + + xrt::XLAComputation c; + auto config = c.mutable_config(); + auto shapes = config->mutable_program_shape(); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::S64, {}); + *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::S64, {}); + *shapes->mutable_result() = xla::ShapeUtil::MakeShape(xla::S64, {}); + StoreComputationSnapshot(AddS64(), c.mutable_hlo_snapshot()); + + xrt::XRTExecutionConfig e; + e.set_release_input_handles(true); + e.set_release_compilation_handle(true); + + Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag()); + auto e_config = + ops::Const(root.WithDevice("/device:CPU:0"), e.SerializeAsString()); + auto computation = + ops::Const(root.WithDevice("/device:CPU:0"), c.SerializeAsString()); + auto c_handle = ops::XRTCompile(root, computation); + auto p0_value = + ops::Const(root.WithDevice("/device:CPU:0"), p0.SerializeAsString()); + auto p0_handle = ops::XRTAllocate(root, p0_value); + auto p1_value = + ops::Const(root.WithDevice("/device:CPU:0"), p1.SerializeAsString()); + auto p1_handle = ops::XRTAllocate(root, p1_value); + auto result = ops::XRTExecute(root, c_handle.handle, e_config, + {Output(p0_handle), Output(p1_handle)}); + auto read_back = ops::XRTReadLiteralAndRelease(root, result); + TF_ASSERT_OK(root.status()); + + ClientSession session(root); + std::vector outputs; + TF_EXPECT_OK(session.Run({read_back, c_handle.program_shape}, &outputs)); + + xla::LiteralProto response; + EXPECT_TRUE(response.ParseFromString(outputs[0].scalar()())); + + auto expected = xla::LiteralUtil::CreateR0(15123899); + EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response)); + + xla::ProgramShape program_shape; + EXPECT_TRUE(program_shape.ParseFromString(outputs[1].vec()(0))); + EXPECT_EQ(program_shape.parameters_size(), 2); + EXPECT_TRUE( + xla::ShapeUtil::HasPrimitiveType(program_shape.result(), xla::S64)); +} + } // namespace } // namespace tensorflow int main(int argc, char** argv) { tensorflow::xla_test_device_ptr = new tensorflow::string("XLA_CPU"); + tensorflow::xla_platform_ptr = new tensorflow::string("CPU"); std::vector flag_list = { tensorflow::Flag("xla_test_device", tensorflow::xla_test_device_ptr, "Tensorflow device type to use for test, e.g., XLA_CPU"), + tensorflow::Flag("xla_platform", tensorflow::xla_platform_ptr, + "The XLA platform to select for the device"), }; tensorflow::string usage = tensorflow::Flags::Usage(argv[0], flag_list); const bool parse_result = tensorflow::Flags::Parse(&argc, argv, flag_list); diff --git a/tensorflow/compiler/xrt/xrt_compilation_cache.cc b/tensorflow/compiler/xrt/xrt_compilation_cache.cc index 4844c7fb7106862dd42b3b3d07245350c9d2383c..d1405eae468492748ae88d842334a922dce272c6 100644 --- a/tensorflow/compiler/xrt/xrt_compilation_cache.cc +++ b/tensorflow/compiler/xrt/xrt_compilation_cache.cc @@ -18,9 +18,19 @@ limitations under the License. #include "absl/synchronization/mutex.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/random/random.h" namespace tensorflow { +namespace { + +int64 get_uid() { + uint64 unsigned_rand = random::New64() & INT64_MAX; + return static_cast(unsigned_rand); +} + +} // namespace + const char* kXRTCompilationCacheResourceName = "xrt_compilation_cache"; XRTCompilationCache::EntryRefImpl::EntryRefImpl(XRTCompilationCache* parent, @@ -46,12 +56,17 @@ XRTCompilationCache::XRTCompilationCache(int max_number_of_entries) XRTCompilationCache::~XRTCompilationCache() { VLOG(1) << "XRTCompilationCache::~XRTCompilationCache()"; + // A buggy client may be holding onto a reference, or a client might have + // crashed while holding onto a reference. In either case, discard all + // outstanding client references to avoid leaking storage. + for (const auto& entry : entries_by_uid_) { + while (!entry.second->RefCountIsOne()) { + entry.second->Unref(); + } + } while (!entries_by_last_use_.empty()) { MarkOldestEntryForEviction(); } - // By the time the cache is deleted all reference holders should have already - // been deleted, since they were holding references to the cache. So all - // entries should be gone at this point. CHECK_EQ(cache_.size(), 0); CHECK_EQ(entries_by_uid_.size(), 0); CHECK_EQ(cache_entries_, 0); @@ -148,7 +163,7 @@ XRTCompilationCache::CompiledSubgraph* XRTCompilationCache::InitializeEntry( CompiledSubgraph* entry = new CompiledSubgraph(); entry->parent = this; entry->key = key; - entry->uid = next_uid_++; + entry->uid = get_uid(); // Add the entry to the cache. Once the computation has been compiled, // UpdateEntryAfterCompilation will be called to potentially mark old entries // that don't fit any more for eviction. diff --git a/tensorflow/compiler/xrt/xrt_compilation_cache.h b/tensorflow/compiler/xrt/xrt_compilation_cache.h index c505299a454506e2136e36fb26833c28ed0d47bc..c43d0fc47873abdc82ee937c155bebc346a05f17 100644 --- a/tensorflow/compiler/xrt/xrt_compilation_cache.h +++ b/tensorflow/compiler/xrt/xrt_compilation_cache.h @@ -211,8 +211,6 @@ class XRTCompilationCache : public ResourceBase { const int max_cache_entries_; mutable absl::Mutex mu_; - // The uid to assign to the next new entry created. - int64 next_uid_ GUARDED_BY(mu_) = 0; // The total number of entries that are stored and not marked for eviction. int cache_entries_ GUARDED_BY(mu_) = 0; // The total number of entries that are marked for eviction. diff --git a/tensorflow/compiler/xrt/xrt_state.cc b/tensorflow/compiler/xrt/xrt_state.cc index d05a1e7dcbff440e0daf03bd25535c26d82b6a0b..3a99820d7aa9e9546cc95385fd98c05f28988e9e 100644 --- a/tensorflow/compiler/xrt/xrt_state.cc +++ b/tensorflow/compiler/xrt/xrt_state.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/framework/resource_mgr.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/random/random.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/stream_executor/stream_executor.h" @@ -42,12 +43,9 @@ namespace { const char* kTupleContainer = "tuples"; -// Counter used to assign unique handles. -mutex _uid_mutex(tensorflow::LINKER_INITIALIZED); -int64 _uid GUARDED_BY(_uid_mutex) = 0; int64 get_uid() { - mutex_lock l(_uid_mutex); - return _uid++; + uint64 unsigned_rand = random::New64() & INT64_MAX; + return static_cast(unsigned_rand); } Status AllocateScopedShapedBuffer( @@ -67,6 +65,9 @@ Status AllocateScopedShapedBuffer( // requests the host-shape sub-buffer at index i, that will correspond to the // right device-shape sub-buffer at the same index. xla::Shape on_device_shape = transfer_manager->HostShapeToDeviceShape(shape); + VLOG(3) << "Allocating literal buffer: host_shape=" + << xla::ShapeUtil::HumanStringWithLayout(shape) << " device_shape=" + << xla::ShapeUtil::HumanStringWithLayout(on_device_shape); // The ScopedShapedBuffer frees the buffers that have so far been allocated if // it goes out of scope. That's useful if we return early as the result of an diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index fa06d351d4e64bfc2fc5e64c81c810185600000a..78ad19a4ab112be08569c857c8ed4e16ceed6d80 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -113,22 +113,52 @@ py_library( "//tensorflow/python:util", "//tensorflow/python/estimator:estimator_py", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], "//tensorflow:linux_s390x": [], "//tensorflow:windows": [], + "//tensorflow:no_kafka_support": [], "//conditions:default": [ - "//tensorflow/contrib/bigtable", - "//tensorflow/contrib/cloud:cloud_py", - "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols "//tensorflow/contrib/kafka", + ], + }) + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//tensorflow:no_aws_support": [], + "//conditions:default": [ "//tensorflow/contrib/kinesis", + ], + }) + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ + "//tensorflow/contrib/fused_conv:fused_conv_py", "//tensorflow/contrib/tensorrt:init_py", "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", ], }) + select({ - "//tensorflow:with_ignite_support": [ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//tensorflow:no_gcp_support": [], + "//conditions:default": [ + "//tensorflow/contrib/bigtable", + "//tensorflow/contrib/cloud:cloud_py", + ], + }) + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//tensorflow:no_ignite_support": [], + "//conditions:default": [ "//tensorflow/contrib/ignite", ], - "//conditions:default": [], }), ) @@ -152,14 +182,26 @@ cc_library( ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([ "//tensorflow/contrib/nccl:nccl_kernels", ]) + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], "//tensorflow:linux_s390x": [], "//tensorflow:windows": [], + "//tensorflow:no_kafka_support": [], "//conditions:default": [ "//tensorflow/contrib/kafka:dataset_kernels", + ], + }) + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//tensorflow:no_aws_support": [], + "//conditions:default": [ "//tensorflow/contrib/kinesis:dataset_kernels", - "//tensorflow/contrib/tensorrt:trt_engine_op_kernel", ], - }), + }) + if_not_windows([ + "//tensorflow/contrib/tensorrt:trt_engine_op_kernel", + ]), ) cc_library( @@ -183,17 +225,33 @@ cc_library( "//tensorflow/contrib/text:all_ops", "//tensorflow/contrib/tpu:all_ops", ] + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], "//tensorflow:linux_s390x": [], "//tensorflow:windows": [], + "//tensorflow:no_kafka_support": [], "//conditions:default": [ "//tensorflow/contrib/kafka:dataset_ops_op_lib", - "//tensorflow/contrib/kinesis:dataset_ops_op_lib", - "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib", ], }) + select({ - "//tensorflow:with_ignite_support": [ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//tensorflow:no_aws_support": [], + "//conditions:default": [ + "//tensorflow/contrib/kinesis:dataset_ops_op_lib", + ], + }) + if_not_windows([ + "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib", + ]) + select({ + "//tensorflow:android": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//tensorflow:no_ignite_support": [], + "//conditions:default": [ "//tensorflow/contrib/ignite:dataset_ops_op_lib", ], - "//conditions:default": [], }), ) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index e71b0e0ae33f9c2dd48643e557447372bc67b3e3..f52a1a7babceeae93cdd2e5a93dad413a1d30191 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -21,14 +21,6 @@ from __future__ import print_function import os -from tensorflow.python.tools import component_api_helper -component_api_helper.package_hook( - parent_package_str=( - "tensorflow.contrib"), - child_package_str=( - "tensorflow_estimator.contrib.estimator")) -del component_api_helper - # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import autograph from tensorflow.contrib import batching diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py index 48f12a64f94c7bd0531488ef537b199558e17e3e..a3df272e6924792128fc38fd153b9527b58b486e 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py @@ -196,6 +196,10 @@ def convert_to_universal_format(dtec, sorted_feature_names, matching_id = categorical_test.value.add() matching_id.int64_value = split.feature_id node.custom_left_child_test.Pack(categorical_test) + elif (node_type == "oblivious_dense_float_binary_split" or + node_type == "oblivious_categorical_id_binary_split"): + raise ValueError("Universal tree format doesn't support oblivious " + "trees") else: raise ValueError("Unexpected node type %s" % node_type) node.left_child_id.value = split.left_id @@ -229,6 +233,13 @@ def _get_feature_importances(dtec, feature_names, num_dense_floats, split = tree_node.categorical_id_binary_split split_column = feature_names[split.feature_column + num_dense_floats + num_sparse_float] + elif node_type == "oblivious_dense_float_binary_split": + split = tree_node.oblivious_dense_float_binary_split + split_column = feature_names[split.feature_column] + elif node_type == "oblivious_categorical_id_binary_split": + split = tree_node.oblivious_categorical_id_binary_split + split_column = feature_names[split.feature_column + num_dense_floats + + num_sparse_float] elif node_type == "categorical_id_set_membership_binary_split": split = tree_node.categorical_id_set_membership_binary_split split_column = feature_names[split.feature_column + num_dense_floats + diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py index 194a5c8754cb0ab2db299e3fb5c998c0f27f8435..ca73e4af2fbd0a383d02fa7111f59161701661df 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator.py @@ -28,7 +28,6 @@ import six from tensorflow.contrib import layers from tensorflow.contrib.boosted_trees.estimator_batch import model from tensorflow.contrib.boosted_trees.estimator_batch import distillation_loss -from tensorflow.contrib.boosted_trees.estimator_batch import estimator_utils from tensorflow.contrib.boosted_trees.estimator_batch import trainer_hooks from tensorflow.contrib.boosted_trees.python.ops import model_ops from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch @@ -170,6 +169,7 @@ def _dnn_tree_combined_model_fn( if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and not use_core_versions): raise ValueError("You must use core versions with Estimator Spec") + global_step = training_util.get_global_step() with variable_scope.variable_scope( dnn_parent_scope, @@ -191,46 +191,58 @@ def _dnn_tree_combined_model_fn( feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=input_layer_scope) - previous_layer = input_layer - for layer_id, num_hidden_units in enumerate(dnn_hidden_units): + def dnn_logits_fn(): + """Builds the logits from the input layer.""" + previous_layer = input_layer + for layer_id, num_hidden_units in enumerate(dnn_hidden_units): + with variable_scope.variable_scope( + "hiddenlayer_%d" % layer_id, + values=(previous_layer,)) as hidden_layer_scope: + net = layers.fully_connected( + previous_layer, + num_hidden_units, + activation_fn=dnn_activation_fn, + variables_collections=[dnn_parent_scope], + scope=hidden_layer_scope) + if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: + net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) + _add_hidden_layer_summary(net, hidden_layer_scope.name) + previous_layer = net with variable_scope.variable_scope( - "hiddenlayer_%d" % layer_id, - values=(previous_layer,)) as hidden_layer_scope: - net = layers.fully_connected( + "logits", values=(previous_layer,)) as logits_scope: + dnn_logits = layers.fully_connected( previous_layer, - num_hidden_units, - activation_fn=dnn_activation_fn, + head.logits_dimension, + activation_fn=None, variables_collections=[dnn_parent_scope], - scope=hidden_layer_scope) - if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: - net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) - _add_hidden_layer_summary(net, hidden_layer_scope.name) - previous_layer = net - with variable_scope.variable_scope( - "logits", values=(previous_layer,)) as logits_scope: - dnn_logits = layers.fully_connected( - previous_layer, - head.logits_dimension, - activation_fn=None, - variables_collections=[dnn_parent_scope], - scope=logits_scope) - _add_hidden_layer_summary(dnn_logits, logits_scope.name) - - def _dnn_train_op_fn(loss): - """Returns the op to optimize the loss.""" - return optimizers.optimize_loss( - loss=loss, - global_step=training_util.get_global_step(), - learning_rate=_DNN_LEARNING_RATE, - optimizer=_get_optimizer(dnn_optimizer), - name=dnn_parent_scope, - variables=ops.get_collection( - ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), - # Empty summaries to prevent optimizers from logging training_loss. - summaries=[]) + scope=logits_scope) + _add_hidden_layer_summary(dnn_logits, logits_scope.name) + return dnn_logits + if predict_with_tree_only and mode == model_fn.ModeKeys.INFER: + dnn_logits = array_ops.constant(0.0) + dnn_train_op_fn = control_flow_ops.no_op + elif predict_with_tree_only and mode == model_fn.ModeKeys.EVAL: + dnn_logits = control_flow_ops.cond( + global_step > dnn_steps_to_train, + lambda: array_ops.constant(0.0), + dnn_logits_fn) + dnn_train_op_fn = control_flow_ops.no_op + else: + dnn_logits = dnn_logits_fn() + def dnn_train_op_fn(loss): + """Returns the op to optimize the loss.""" + return optimizers.optimize_loss( + loss=loss, + global_step=training_util.get_global_step(), + learning_rate=_DNN_LEARNING_RATE, + optimizer=_get_optimizer(dnn_optimizer), + name=dnn_parent_scope, + variables=ops.get_collection( + ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope), + # Empty summaries to prevent optimizers from logging training_loss. + summaries=[]) # Build Tree Logits. - global_step = training_util.get_global_step() with ops.device(global_step.device): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, @@ -261,8 +273,13 @@ def _dnn_tree_combined_model_fn( """Returns the op to optimize the loss.""" if dnn_to_tree_distillation_param: loss_weight, loss_fn = dnn_to_tree_distillation_param - weight_tensor = head_lib._weight_tensor( # pylint: disable=protected-access - features, head.weight_column_name) + # pylint: disable=protected-access + if use_core_versions: + weight_tensor = head_lib._weight_tensor(features, head._weight_column) + else: + weight_tensor = head_lib._weight_tensor( + features, head.weight_column_name) + # pylint: enable=protected-access dnn_logits_fixed = array_ops.stop_gradient(dnn_logits) if loss_fn is None: @@ -305,52 +322,26 @@ def _dnn_tree_combined_model_fn( finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor() if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS: - if use_core_versions: - model_fn_ops = head.create_estimator_spec( - features=features, - mode=mode, - labels=labels, - train_op_fn=_no_train_op_fn, - logits=tree_train_logits) - dnn_train_op = head.create_estimator_spec( - features=features, - mode=mode, - labels=labels, - train_op_fn=_dnn_train_op_fn, - logits=dnn_logits) - dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops( - dnn_train_op).train_op - - tree_train_op = head.create_estimator_spec( - features=tree_features, - mode=mode, - labels=labels, - train_op_fn=_tree_train_op_fn, - logits=tree_train_logits) - tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops( - tree_train_op).train_op - - model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops( - model_fn_ops) - else: - model_fn_ops = head.create_model_fn_ops( - features=features, - mode=mode, - labels=labels, - train_op_fn=_no_train_op_fn, - logits=tree_train_logits) - dnn_train_op = head.create_model_fn_ops( - features=features, - mode=mode, - labels=labels, - train_op_fn=_dnn_train_op_fn, - logits=dnn_logits).train_op - tree_train_op = head.create_model_fn_ops( - features=tree_features, - mode=mode, - labels=labels, - train_op_fn=_tree_train_op_fn, - logits=tree_train_logits).train_op + model_fn_ops = head.create_model_fn_ops( + features=features, + mode=mode, + labels=labels, + train_op_fn=_no_train_op_fn, + logits=tree_train_logits) + if mode != model_fn.ModeKeys.TRAIN: + return model_fn_ops + dnn_train_op = head.create_model_fn_ops( + features=features, + mode=mode, + labels=labels, + train_op_fn=dnn_train_op_fn, + logits=dnn_logits).train_op + tree_train_op = head.create_model_fn_ops( + features=tree_features, + mode=mode, + labels=labels, + train_op_fn=_tree_train_op_fn, + logits=tree_train_logits).train_op # Add the hooks model_fn_ops.training_hooks.extend([ @@ -369,11 +360,13 @@ def _dnn_tree_combined_model_fn( labels=labels, train_op_fn=_no_train_op_fn, logits=tree_train_logits) + if mode != model_fn.ModeKeys.TRAIN: + return fusion_spec dnn_spec = head.create_estimator_spec( features=features, mode=mode, labels=labels, - train_op_fn=_dnn_train_op_fn, + train_op_fn=dnn_train_op_fn, logits=dnn_logits) tree_spec = head.create_estimator_spec( features=tree_features, diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py index 839eedd3a87ccaa1faecd1966fe5907d682cac02..dea19b7c62649679f944809b44c51ba0cd361904 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py @@ -18,13 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import tempfile from tensorflow.contrib.boosted_trees.estimator_batch import dnn_tree_combined_estimator as estimator from tensorflow.contrib.boosted_trees.proto import learner_pb2 from tensorflow.contrib.layers.python.layers import feature_column from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils from tensorflow.contrib.learn.python.learn.estimators import run_config +from tensorflow.python.estimator import exporter from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.export import export +from tensorflow.python.ops import parsing_ops from tensorflow.python.feature_column import feature_column_lib as core_feature_column from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes @@ -34,6 +38,7 @@ from tensorflow.python.ops.losses import losses from tensorflow.python.platform import googletest from tensorflow.python.training import checkpoint_utils + def _train_input_fn(): features = { "x": constant_op.constant([[2.], [1.], [1.]]) @@ -103,35 +108,6 @@ class DNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase): classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) - def testFitAndEvaluateDontThrowExceptionWithCore(self): - learner_config = learner_pb2.LearnerConfig() - learner_config.num_classes = 2 - learner_config.constraints.max_tree_depth = 1 - model_dir = tempfile.mkdtemp() - config = run_config.RunConfig() - - # Use core head - head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( - loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) - - classifier = estimator.DNNBoostedTreeCombinedEstimator( - head=head_fn, - dnn_hidden_units=[1], - # Use core feature columns - dnn_feature_columns=[core_feature_column.numeric_column("x")], - tree_learner_config=learner_config, - num_trees=1, - tree_examples_per_layer=3, - model_dir=model_dir, - config=config, - dnn_steps_to_train=10, - dnn_input_layer_to_tree=True, - tree_feature_columns=[], - use_core_versions=True) - - classifier.fit(input_fn=_train_input_fn, steps=15) - classifier.evaluate(input_fn=_eval_input_fn, steps=1) - def testFitAndEvaluateWithDistillation(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 @@ -223,6 +199,51 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase): self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn) + def testTrainEvaluateWithDnnForInputAndTreeForPredict(self): + head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( + loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) + + learner_config = learner_pb2.LearnerConfig() + learner_config.num_classes = 2 + learner_config.constraints.max_tree_depth = 3 + model_dir = tempfile.mkdtemp() + config = run_config.RunConfig() + + est = estimator.CoreDNNBoostedTreeCombinedEstimator( + head=head_fn, + dnn_hidden_units=[1], + dnn_feature_columns=[core_feature_column.numeric_column("x")], + tree_learner_config=learner_config, + num_trees=1, + tree_examples_per_layer=3, + model_dir=model_dir, + config=config, + dnn_steps_to_train=10, + dnn_input_layer_to_tree=True, + predict_with_tree_only=True, + dnn_to_tree_distillation_param=(0.5, None), + tree_feature_columns=[]) + + # Train for a few steps. + est.train(input_fn=_train_input_fn, steps=1000) + res = est.evaluate(input_fn=_eval_input_fn, steps=1) + self.assertLess(0.5, res["auc"]) + est.predict(input_fn=_eval_input_fn) + serving_input_fn = ( + export.build_parsing_serving_input_receiver_fn( + feature_spec={"x": parsing_ops.FixedLenFeature( + [1], dtype=dtypes.float32)})) + base_exporter = exporter.FinalExporter( + name="Servo", + serving_input_receiver_fn=serving_input_fn, + assets_extra=None) + export_path = os.path.join(model_dir, "export") + base_exporter.export( + est, + export_path=export_path, + checkpoint_path=None, + eval_result={}, + is_the_final_export=True) if __name__ == "__main__": googletest.main() diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index f675c135f4fc362ea620ea5b04d6b7fd536fceaf..244683765a75626acd932ef8a10d8e5b6639ebb0 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -1,6 +1,16 @@ # Minimum CMake required cmake_minimum_required(VERSION 3.5) +if(WIN32) + if(${CMAKE_VERSION} VERSION_LESS "3.8") + message(WARNING "Your current cmake version is ${CMAKE_VERSION} which does not support setting the toolset architecture to x64. This may cause \"compiler out of heap space\" errors when building. Consider upgrading your cmake to > 3.8 and using the flag -Thost=x64 when running cmake.") + else() + if(NOT CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE OR NOT "${CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE}" STREQUAL "x64") + message(WARNING "Your current cmake generator is set to use 32 bit toolset architecture. This may cause \"compiler out of heap space\" errors when building. Consider using the flag -Thost=x64 when running cmake.") + endif() + endif() +endif() + # Project project(tensorflow C CXX) @@ -352,9 +362,7 @@ if (tensorflow_ENABLE_MKL_SUPPORT) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${mkldnn_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES mkldnn_copy_shared_to_destination) include_directories(${mkldnn_INCLUDE_DIRS}) - else (tensorflow_ENABLE_MKLDNN_SUPPORT) - add_definitions(-DINTEL_MKL_ML_ONLY) - endif() + endif(tensorflow_ENABLE_MKLDNN_SUPPORT) endif (tensorflow_ENABLE_MKL_SUPPORT) if (tensorflow_ENABLE_GPU) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 77242b34fd8302cb9104c50a83d4141607911e7f..84c679162c3ed8ffc9babcd3af583b26fb62c2d6 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -108,180 +108,177 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the prerequisites detailed above, and set up your environment. - - * The following commands assume that you are using the Windows Command - Prompt (`cmd.exe`). You will need to set up your environment to use the - appropriate toolchain, i.e. the 64-bit tools. (Some of the binary targets - we will build are too large for the 32-bit tools, and they will fail with - out-of-memory errors.) The typical command to do set up your - environment is: - - ``` - D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" - ``` - - * When building with GPU support after installing the CUDNN zip file from NVidia, append its - bin directory to your PATH environment variable. - In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable. - It should contain the directory of the CUDA dlls and the directory of the CUDNN dll. - For example: - - ``` - D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin - D:\local\cuda\bin - ``` - - * When building with MKL support after installing [MKL](https://software.intel.com/en-us/mkl) from INTEL, append its bin directories to your PATH environment variable. - - In case TensorFlow fails to find the MKL dll's during initialization, check your PATH environment variable. - It should contain the directory of the MKL dlls. For example: - - ``` - D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\mkl - D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\compiler - D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\tbb\vc_mt - ``` - - - * We assume that `cmake` and `git` are installed and in your `%PATH%`. If - for example `cmake` is not in your path and it is installed in - `C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory - to your `%PATH%` as follows: - - ``` - D:\temp> set PATH="%PATH%;C:\Program Files (x86)\CMake\bin\cmake.exe" - ``` - -2. Clone the TensorFlow repository and create a working directory for your - build: - - ``` - D:\temp> git clone https://github.com/tensorflow/tensorflow.git - D:\temp> cd tensorflow\tensorflow\contrib\cmake - D:\temp\tensorflow\tensorflow\contrib\cmake> mkdir build - D:\temp\tensorflow\tensorflow\contrib\cmake> cd build - D:\temp\tensorflow\tensorflow\contrib\cmake\build> - ``` - -3. Invoke CMake to create Visual Studio solution and project files. - - **N.B.** This assumes that `cmake.exe` is in your `%PATH%` environment - variable. The other paths are for illustrative purposes only, and may - be different on your platform. The `^` character is a line continuation - and must be the last character on each line. - - ``` - D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^ - More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ - More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ - More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib - ``` - To build with GPU support add "^" at the end of the last line above following with: - ``` - More? -Dtensorflow_ENABLE_GPU=ON ^ - More? -DCUDNN_HOME="D:\...\cudnn" - ``` - To build with MKL support add "^" at the end of the last line above following with: - - ``` - More? -Dtensorflow_ENABLE_MKL_SUPPORT=ON ^ - More? -DMKL_HOME="D:\...\compilers_and_libraries" - ``` - - To enable SIMD instructions with MSVC, as AVX and SSE, define it as follows: - - ``` - More? -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX - ``` - - Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build - configuration that you choose when invoking `msbuild`. The known-good - values are `Release` and `RelWithDebInfo`. The `Debug` build type is - not currently supported, because it relies on a `Debug` library for - Python (`python35d.lib`) that is not distributed by default. - - There are various options that can be specified when generating the - solution and project files: - - * `-DCMAKE_BUILD_TYPE=(Release|RelWithDebInfo)`: Note that the - `CMAKE_BUILD_TYPE` option must match the build configuration that you - choose when invoking MSBuild in step 4. The known-good values are - `Release` and `RelWithDebInfo`. The `Debug` build type is not currently - supported, because it relies on a `Debug` library for Python - (`python35d.lib`) that is not distributed by default. - - * `-Dtensorflow_BUILD_ALL_KERNELS=(ON|OFF)`. Defaults to `ON`. You can - build a small subset of the kernels for a faster build by setting this - option to `OFF`. - - * `-Dtensorflow_BUILD_CC_EXAMPLE=(ON|OFF)`. Defaults to `ON`. Generate - project files for a simple C++ - [example training program](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/tutorials/example_trainer.cc). - - * `-Dtensorflow_BUILD_PYTHON_BINDINGS=(ON|OFF)`. Defaults to `ON`. Generate - project files for building a PIP package containing the TensorFlow runtime - and its Python bindings. - - * `-Dtensorflow_ENABLE_GRPC_SUPPORT=(ON|OFF)`. Defaults to `ON`. Include - gRPC support and the distributed client and server code in the TensorFlow - runtime. - - * `-Dtensorflow_ENABLE_SSL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include - SSL support (for making secure HTTP requests) in the TensorFlow runtime. - This support is incomplete, and will be used for Google Cloud Storage - support. - - * `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include - GPU support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and CUDNN 5.1. - CMake will expect the location of CUDNN in -DCUDNN_HOME=path_you_unzipped_cudnn. - - * `-Dtensorflow_BUILD_CC_TESTS=(ON|OFF)`. Defaults to `OFF`. This builds cc unit tests. - There are many of them and building will take a few hours. - After cmake, build and execute the tests with - ``` - MSBuild /p:Configuration=RelWithDebInfo ALL_BUILD.vcxproj - ctest -C RelWithDebInfo - ``` - - * `-Dtensorflow_BUILD_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This enables python kernel tests. - After building the python wheel, you need to install the new wheel before running the tests. - To execute the tests, use - ``` - ctest -C RelWithDebInfo - ``` - - * `-Dtensorflow_BUILD_MORE_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This enables python tests on - serveral major packages. This option is only valid if this and tensorflow_BUILD_PYTHON_TESTS are both set as `ON`. - After building the python wheel, you need to install the new wheel before running the tests. - To execute the tests, use - ``` - ctest -C RelWithDebInfo - ``` - - * `-Dtensorflow_ENABLE_MKL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL support. If MKL is enabled you need to install the [Intel Math Kernal Library](https://software.intel.com/en-us/mkl). - CMake will expect the location of MKL in -MKL_HOME=path_you_install_mkl. - - * `-Dtensorflow_ENABLE_MKLDNN_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include MKL DNN support. MKL DNN is [Intel(R) Math Kernel Library for Deep Neural Networks (Intel(R) MKL-DNN)](https://github.com/intel/mkl-dnn). You have to add `-Dtensorflow_ENABLE_MKL_SUPPORT=ON` before including MKL DNN support. - - -4. Invoke MSBuild to build TensorFlow. - - To build the C++ example program, which will be created as a `.exe` - executable in the subdirectory `.\Release`: - - ``` - D:\...\build> MSBuild /p:Configuration=Release tf_tutorials_example_trainer.vcxproj - D:\...\build> Release\tf_tutorials_example_trainer.exe - ``` - - To build the PIP package, which will be created as a `.whl` file in the - subdirectory `.\tf_python\dist`: - - ``` - D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj - ``` - +1. Install the prerequisites detailed above, and set up your environment. + + * When building with GPU support after installing the CUDNN zip file from + NVidia, append its bin directory to your PATH environment variable. In + case TensorFlow fails to find the CUDA dll's during initialization, + check your PATH environment variable. It should contain the directory of + the CUDA dlls and the directory of the CUDNN dll. For example: + + ``` + D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin + D:\local\cuda\bin + ``` + + * When building with MKL support after installing + [MKL](https://software.intel.com/en-us/mkl) from INTEL, append its bin + directories to your PATH environment variable. + + In case TensorFlow fails to find the MKL dll's during initialization, + check your PATH environment variable. It should contain the directory of + the MKL dlls. For example: + + ``` + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\mkl + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\compiler + D:\Tools\IntelSWTools\compilers_and_libraries\windows\redist\intel64\tbb\vc_mt + ``` + + * We assume that `cmake` and `git` are installed and in your `%PATH%`. If + for example `cmake` is not in your path and it is installed in + `C:\Program Files (x86)\CMake\bin\cmake.exe`, you can add this directory + to your `%PATH%` as follows: + + ``` + D:\temp> set PATH="%PATH%;C:\Program Files (x86)\CMake\bin\cmake.exe" + ``` + +2. Clone the TensorFlow repository and create a working directory for your + build: + + ``` + D:\temp> git clone https://github.com/tensorflow/tensorflow.git + D:\temp> cd tensorflow\tensorflow\contrib\cmake + D:\temp\tensorflow\tensorflow\contrib\cmake> mkdir build + D:\temp\tensorflow\tensorflow\contrib\cmake> cd build + D:\temp\tensorflow\tensorflow\contrib\cmake\build> + ``` + +3. Invoke CMake to create Visual Studio solution and project files. + + **N.B.** This assumes that `cmake.exe` is in your `%PATH%` environment + variable. The other paths are for illustrative purposes only, and may be + different on your platform. The `^` character is a line continuation and + must be the last character on each line. + + ``` + D:\...\build> cmake .. -A x64 -Thost=x64 -DCMAKE_BUILD_TYPE=Release ^ + More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ + More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ + More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib + ``` + + To build with GPU support add "^" at the end of the last line above + following with: `More? -Dtensorflow_ENABLE_GPU=ON ^ More? + -DCUDNN_HOME="D:\...\cudnn"` To build with MKL support add "^" at the end of + the last line above following with: + + ``` + More? -Dtensorflow_ENABLE_MKL_SUPPORT=ON ^ + More? -DMKL_HOME="D:\...\compilers_and_libraries" + ``` + + To enable SIMD instructions with MSVC, as AVX and SSE, define it as follows: + + ``` + More? -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX + ``` + + Note that the `-DCMAKE_BUILD_TYPE=Release` flag must match the build + configuration that you choose when invoking `msbuild`. The known-good values + are `Release` and `RelWithDebInfo`. The `Debug` build type is not currently + supported, because it relies on a `Debug` library for Python + (`python35d.lib`) that is not distributed by default. + + The `-Thost=x64` flag will ensure that the 64 bit compiler and linker is + used when building. Without this flag, MSBuild will use the 32 bit toolchain + which is prone to compile errors such as "compiler out of heap space". + + There are various options that can be specified when generating the solution + and project files: + + * `-DCMAKE_BUILD_TYPE=(Release|RelWithDebInfo)`: Note that the + `CMAKE_BUILD_TYPE` option must match the build configuration that you + choose when invoking MSBuild in step 4. The known-good values are + `Release` and `RelWithDebInfo`. The `Debug` build type is not currently + supported, because it relies on a `Debug` library for Python + (`python35d.lib`) that is not distributed by default. + + * `-Dtensorflow_BUILD_ALL_KERNELS=(ON|OFF)`. Defaults to `ON`. You can + build a small subset of the kernels for a faster build by setting this + option to `OFF`. + + * `-Dtensorflow_BUILD_CC_EXAMPLE=(ON|OFF)`. Defaults to `ON`. Generate + project files for a simple C++ + [example training program](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/tutorials/example_trainer.cc). + + * `-Dtensorflow_BUILD_PYTHON_BINDINGS=(ON|OFF)`. Defaults to `ON`. + Generate project files for building a PIP package containing the + TensorFlow runtime and its Python bindings. + + * `-Dtensorflow_ENABLE_GRPC_SUPPORT=(ON|OFF)`. Defaults to `ON`. Include + gRPC support and the distributed client and server code in the + TensorFlow runtime. + + * `-Dtensorflow_ENABLE_SSL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include + SSL support (for making secure HTTP requests) in the TensorFlow runtime. + This support is incomplete, and will be used for Google Cloud Storage + support. + + * `-Dtensorflow_ENABLE_GPU=(ON|OFF)`. Defaults to `OFF`. Include GPU + support. If GPU is enabled you need to install the CUDA 8.0 Toolkit and + CUDNN 5.1. CMake will expect the location of CUDNN in + -DCUDNN_HOME=path_you_unzipped_cudnn. + + * `-Dtensorflow_BUILD_CC_TESTS=(ON|OFF)`. Defaults to `OFF`. This builds + cc unit tests. There are many of them and building will take a few + hours. After cmake, build and execute the tests with `MSBuild + /p:Configuration=RelWithDebInfo ALL_BUILD.vcxproj ctest -C + RelWithDebInfo` + + * `-Dtensorflow_BUILD_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This + enables python kernel tests. After building the python wheel, you need + to install the new wheel before running the tests. To execute the tests, + use `ctest -C RelWithDebInfo` + + * `-Dtensorflow_BUILD_MORE_PYTHON_TESTS=(ON|OFF)`. Defaults to `OFF`. This + enables python tests on serveral major packages. This option is only + valid if this and tensorflow_BUILD_PYTHON_TESTS are both set as `ON`. + After building the python wheel, you need to install the new wheel + before running the tests. To execute the tests, use `ctest -C + RelWithDebInfo` + + * `-Dtensorflow_ENABLE_MKL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include + MKL support. If MKL is enabled you need to install the + [Intel Math Kernal Library](https://software.intel.com/en-us/mkl). CMake + will expect the location of MKL in -MKL_HOME=path_you_install_mkl. + + * `-Dtensorflow_ENABLE_MKLDNN_SUPPORT=(ON|OFF)`. Defaults to `OFF`. + Include MKL DNN support. MKL DNN is [Intel(R) Math Kernel Library for + Deep Neural Networks (Intel(R) + MKL-DNN)](https://github.com/intel/mkl-dnn). You have to add + `-Dtensorflow_ENABLE_MKL_SUPPORT=ON` before including MKL DNN support. + +4. Invoke MSBuild to build TensorFlow. + + Set up the path to find MSbuild: `D:\temp> "C:\Program Files (x86)\Microsoft + Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat"` + + To build the C++ example program, which will be created as a `.exe` + executable in the subdirectory `.\Release`: + + ``` + D:\...\build> MSBuild /p:Configuration=Release tf_tutorials_example_trainer.vcxproj + D:\...\build> Release\tf_tutorials_example_trainer.exe + ``` + + To build the PIP package, which will be created as a `.whl` file in the + subdirectory `.\tf_python\dist`: + + ``` + D:\...\build> MSBuild /p:Configuration=Release tf_python_build_pip_package.vcxproj + ``` Linux Continuous Integration build ================================== diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py index 873b03580d6f1d9cb25c79cb31989d43cdb8c9a7..f2636e190c25c094dd4ee1370c4728994b1014f5 100644 --- a/tensorflow/contrib/compiler/xla.py +++ b/tensorflow/contrib/compiler/xla.py @@ -272,7 +272,7 @@ def _compile_internal(computation, inputs=None): raise TypeError( 'Supplied computation cannot be called with the specified inputs. You ' 'specified %d inputs: %s, but the computation needs %s' % - (input_arity, str([i.name for i in inputs[0]]), arg_error)) + (input_arity, str([i.name for i in inputs]), arg_error)) cluster_name = ops.get_default_graph().unique_name('cluster') pivot = control_flow_ops.no_op(name=cluster_name + '/pivot') @@ -606,8 +606,8 @@ class _ModelFnWrapper(object): def estimator_model_fn(target_model_fn=None): """estimator_model_fn decorates a model_fn to be compiled for execution. - Currently only it only works with `TPUEstimator`. If you need to use it with - base `Estimator`, please add `tf.enable_resource_variables()` at beginning of + Currently it only works with `TPUEstimator`. If you need to use it with base + `Estimator`, please add `tf.enable_resource_variables()` at the beginning of your program. Example 1, decorating model_fn: diff --git a/tensorflow/contrib/copy_graph/python/util/copy_test.py b/tensorflow/contrib/copy_graph/python/util/copy_test.py index ba97c7845635596c3f4f849044b6707ec43f5bbf..4d8651a79fde9b876d4fdd9b050e71d2eb7c893d 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_test.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py @@ -26,15 +26,16 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test -graph1 = ops.Graph() -graph2 = ops.Graph() - class CopyVariablesTest(test.TestCase): + def setUp(self): + self.graph1 = ops.Graph() + self.graph2 = ops.Graph() + def testVariableCopy(self): - with graph1.as_default(): + with self.graph1.as_default(): #Define a Variable in graph1 some_var = variables.VariableV1(2) #Initialize session @@ -43,13 +44,15 @@ class CopyVariablesTest(test.TestCase): variables.global_variables_initializer().run(session=sess1) #Make a copy of some_var in the defsult scope in graph2 - copy1 = copy_elements.copy_variable_to_graph(some_var, graph2) + copy1 = copy_elements.copy_variable_to_graph(some_var, self.graph2) #Make another copy with different scope - copy2 = copy_elements.copy_variable_to_graph(some_var, graph2, "test_scope") + copy2 = copy_elements.copy_variable_to_graph(some_var, + self.graph2, + "test_scope") #Initialize both the copies - with graph2.as_default(): + with self.graph2.as_default(): #Initialize Session sess2 = session_lib.Session() #Initialize the Variables @@ -67,9 +70,13 @@ class CopyVariablesTest(test.TestCase): class CopyOpsTest(test.TestCase): + def setUp(self): + self.graph1 = ops.Graph() + self.graph2 = ops.Graph() + def testOpsCopy(self): - with graph1.as_default(): + with self.graph1.as_default(): #Initialize a basic expression y = ax + b x = array_ops.placeholder("float") a = variables.VariableV1(3.0) @@ -82,21 +89,21 @@ class CopyOpsTest(test.TestCase): variables.global_variables_initializer().run(session=sess1) #First, initialize a as a Variable in graph2 - a1 = copy_elements.copy_variable_to_graph(a, graph2) + a1 = copy_elements.copy_variable_to_graph(a, self.graph2) #Initialize a1 in graph2 - with graph2.as_default(): + with self.graph2.as_default(): #Initialize session sess2 = session_lib.Session() #Initialize the Variable variables.global_variables_initializer().run(session=sess2) #Initialize a copy of y in graph2 - y1 = copy_elements.copy_op_to_graph(y, graph2, [a1]) + y1 = copy_elements.copy_op_to_graph(y, self.graph2, [a1]) #Now that y has been copied, x must be copied too. #Get that instance - x1 = copy_elements.get_copied_op(x, graph2) + x1 = copy_elements.get_copied_op(x, self.graph2) #Compare values of y & y1 for a sample input #and check if they match diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 8267612236bcf2946c033d3e5071eee935d2c03a..76d5b59ce17279b7c6d2d930504153fc31deb8e2 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -411,6 +411,24 @@ cuda_py_test( ], ) +cuda_py_test( + name = "moving_averages_test", + srcs = ["moving_averages_test.py"], + additional_deps = [ + ":combinations", + "@absl_py//absl/testing:parameterized", + "//tensorflow/python/eager:test", + "//tensorflow/python:array_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:training", + "//tensorflow/python:variables", + ], + tags = [ + "no_pip", + ], +) + cuda_py_test( name = "optimizer_v2_test", srcs = ["optimizer_v2_test.py"], diff --git a/tensorflow/contrib/distribute/python/examples/keras_mnist.py b/tensorflow/contrib/distribute/python/examples/keras_mnist.py index a84ef041960e389c08246fc8a16df2300856d968..da7f8c548f94972b6ec0a67848e1520386d1e28b 100644 --- a/tensorflow/contrib/distribute/python/examples/keras_mnist.py +++ b/tensorflow/contrib/distribute/python/examples/keras_mnist.py @@ -113,7 +113,7 @@ def main(_): distribute=strategy) # Train the model with the train dataset. - model.fit(x=train_ds, epochs=20, steps_per_epoch=310) + model.fit(x=train_ds, epochs=20, steps_per_epoch=468) # Evaluate the model with the eval dataset. score = model.evaluate(eval_ds, steps=10, verbose=0) diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py index 3511b7761ff4d8c995bfa40a1098b8e803f2a1b3..dfa38912897359036d5c333b69f54047f52a2f49 100644 --- a/tensorflow/contrib/distribute/python/keras_test.py +++ b/tensorflow/contrib/distribute/python/keras_test.py @@ -367,6 +367,83 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase, # Verify that the numpy value is copied to the variable. self.assertAllEqual(x, val) + def test_calculating_batch_params(self): + # This verifies that we calculate the number of steps when the batch size + # is specified. + with self.cached_session(): + # 64 is the number of input samples. + inputs = np.zeros((64, 3), dtype=np.float32) + # The number of towers is equal to 3. + strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', + '/device:CPU:0', + '/device:GPU:1']) + + with self.assertRaisesRegexp(ValueError, 'Please specify a batch_size ' + 'that is smaller than'): + # The batch size(128) is larger than the number of input + # samples(64). + distributed_training_utils.get_input_batch_params(inputs, + 128, + strategy) + + with self.assertRaisesRegexp(ValueError, 'is smaller than the number ' + 'of towers'): + # The batch size(32) * num_towers(3) is 96 which is greater than the + # number of input samples(64). + distributed_training_utils.get_input_batch_params(inputs, + 32, + strategy) + + # The number of towers now is equal to 2. + strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', + '/device:CPU:0']) + # 32 is the batch size per tower. + steps = distributed_training_utils.get_input_batch_params(inputs, + 32, + strategy) + # The number of batches is the ratio of input samples(64) to + # batch size(32) which is 2. The number of steps(1) is the ratio of + # number of batches(2) to the number of towers(2). + self.assertEqual(steps, 1) + + # 16 is the batch size per tower. + steps = distributed_training_utils.get_input_batch_params(inputs, + 16, + strategy) + # The number of batches is the ratio of input samples(64) to + # batch size(16) which is 4. The number of steps(2) is the ratio of + # number of batches(4) to the number of towers(2). + self.assertEqual(steps, 2) + + def test_calculating_batch_size(self): + with self.cached_session(): + # 64 is the number of input samples. + inputs = np.zeros((64, 3), dtype=np.float32) + targets = np.zeros((64, 4), dtype=np.float32) + + model = get_model() + optimizer = gradient_descent.GradientDescentOptimizer(0.001) + loss = 'mse' + strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', + '/device:CPU:0']) + strategy._require_static_shapes = True + + model.compile(optimizer, loss, distribute=strategy) + iterator = model._distribution_standardize_user_data(inputs, + targets, + batch_size=None, + check_steps=True, + steps_name='steps', + steps=3) + + # The global batch size(21) across all towers is the ratio of the input + # samples(64) to the steps(3). + # The batch size(10) per device is the ratio of the global batch size(21) + # to the number of towers(2). + # The global batch size and batch size are rounded integer values. + self.assertEqual(10, distributed_training_utils.get_batch_dimension( + iterator._iterator)) + @combinations.generate(strategy_combinations()) def test_calling_model_with_numpy_arrays(self, distribution): with self.cached_session(): @@ -592,33 +669,37 @@ class TestDistributionStrategyWithDatasets(test.TestCase, # meaningful values. Currently we don't pass the learning phase if the # Lambda layer uses the learning phase. with self.cached_session(): - x = keras.layers.Input(shape=(16,), name='input') - y = keras.layers.Dense(16)(x) + x = keras.layers.Input(shape=(1,), name='input') + y = keras.layers.Dense(1, kernel_initializer='ones')(x) z = keras.layers.Dropout(0.9999)(y) model = keras.Model(x, z) + initial_weights = model.get_weights() optimizer = gradient_descent.GradientDescentOptimizer(0.005) loss = 'mse' metrics = ['acc'] - strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0', - '/device:CPU:0']) + strategy = mirrored_strategy.MirroredStrategy( + ['/device:GPU:0', '/device:GPU:1']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) - inputs = np.random.rand(10, 16) - targets = np.ones((10, 16), dtype=np.float32) + inputs = np.ones((10, 1), dtype=np.float32) + targets = np.ones((10, 1), dtype=np.float32) dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(8) - - hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1) - self.assertEqual(hist.history['acc'][0], 1) + dataset = dataset.repeat().batch(8) + hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1) + self.assertAlmostEqual(hist.history['acc'][0], 0, 0) + model.set_weights(initial_weights) evaluate_output = model.evaluate(dataset, steps=20) - self.assertEqual(evaluate_output[1], 0) - - predict_output = model.predict(dataset, steps=1) - self.assertNotEqual(np.mean(predict_output), 0) + self.assertAlmostEqual(evaluate_output[1], 1, 0) + + inputs = np.ones((10, 1), dtype=np.float32) + predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs) + predict_dataset = predict_dataset.repeat().batch(5) + output = model.predict(predict_dataset, steps=10) + ref_output = np.ones((50, 1), dtype=np.float32) + self.assertArrayNear(output[0], ref_output, 1e-1) class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase): diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index a32424b316b003cc58ccf28fd968acb6a764a542..0f82508428a58fb671cef25c97ca5880ebb38e83 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -293,7 +293,8 @@ def _create_mirrored_variable(devices, real_mirrored_creator, *args, **kwargs): collections.append(ops.GraphKeys.TRAINABLE_VARIABLES) l = g.get_collection_ref(ops.GraphKeys.TRAINABLE_VARIABLES) for v in index.values(): - l.remove(v) + if v in l: + l.remove(v) g.add_to_collections(collections, result) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, result) @@ -461,16 +462,20 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): # name as the absolute name of the variable. kwargs["name"] = "%s/replica_%d/" % (var0name, i) # Initialize replicas with the same value: - if context.executing_eagerly(): - kwargs["initial_value"] = array_ops.identity( - index[devices[0]].value()) - else: - def initial_value_fn(device=d): + def initial_value_fn(device=d): + if context.executing_eagerly(): + init_value = index[devices[0]].value() + return array_ops.identity(init_value) + else: with ops.device(device): - return array_ops.identity(index[devices[0]].initial_value) - kwargs["initial_value"] = initial_value_fn + init_value = index[devices[0]].initial_value + return array_ops.identity(init_value) + kwargs["initial_value"] = initial_value_fn with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT): - v = next_creator(*args, **kwargs) + # Don't record operations (e.g. other variable reads) during + # variable creation. + with tape.stop_recording(): + v = next_creator(*args, **kwargs) assert not isinstance(v, values.DistributedVariable) index[d] = v return index diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py index eeac528329a239f6a8a68a72c44272566b1d83d1..ed36639ce86e891544edb644150c5d31fe610b4f 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy_multigpu_test.py @@ -20,6 +20,8 @@ from __future__ import print_function import sys +import numpy as np + from tensorflow.contrib.distribute.python import mirrored_strategy from tensorflow.contrib.distribute.python import multi_worker_test_base from tensorflow.contrib.distribute.python import strategy_test_lib @@ -34,7 +36,10 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.keras.engine import training as keras_training +from tensorflow.python.keras.layers import core as keras_core from tensorflow.python.layers import core +from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn from tensorflow.python.ops import rnn_cell_impl @@ -43,6 +48,8 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import device_util from tensorflow.python.training import distribution_strategy_context +from tensorflow.python.training import gradient_descent +from tensorflow.python.training import optimizer as optimizer_lib from tensorflow.python.training import server_lib @@ -1245,6 +1252,22 @@ class MockModel(object): return x +class MiniModel(keras_training.Model): + """Minimal model for mnist. + + Useful for testing and debugging on slow TPU simulators. + """ + + def __init__(self): + super(MiniModel, self).__init__(name="") + self.fc = keras_core.Dense(1, name="fc", kernel_initializer="ones", + bias_initializer="ones") + + def call(self, inputs, training=True): + inputs = array_ops.ones([1, 10]) + return self.fc(inputs) + + class MirroredStrategyDefunTest(test.TestCase): def _skip_eager_if_gpus_less_than(self, num_gpus): @@ -1365,6 +1388,41 @@ class MirroredStrategyDefunTest(test.TestCase): "GPU:0": 3.0 * 1.25}) self._call_and_check(fn1, [factors], expected_result, [fn1]) + @test_util.run_in_graph_and_eager_modes() + def testTrain(self): + self._skip_eager_if_gpus_less_than(1) + + cpu_dev = device_util.canonicalize("CPU:0") + gpu_dev = device_util.canonicalize("GPU:0") + devices = [cpu_dev, gpu_dev] + dist = mirrored_strategy.MirroredStrategy(devices) + + with dist.scope(): + mock_model = MiniModel() + mock_model.call = function.defun(mock_model.call) + + def loss_fn(ctx): + del ctx + return mock_model(array_ops.ones([1, 10])) + + gradients_fn = backprop.implicit_grad(loss_fn) + gradients_fn = optimizer_lib.get_filtered_grad_fn(gradients_fn) + grads_and_vars = dist.call_for_each_tower( + gradients_fn, None, run_concurrently=False) + + optimizer = gradient_descent.GradientDescentOptimizer(0.25) + update_ops = optimizer._distributed_apply(dist, grads_and_vars) # pylint: disable=protected-access + + if not context.executing_eagerly(): + self.evaluate(variables.global_variables_initializer()) + self.evaluate(update_ops) + + updated_var_values = self.evaluate(mock_model.variables) + # All variables start at 1.0 and get two updates of 0.25. + self.assertAllEqual(0.5 * np.ones([10, 1]), updated_var_values[0]) + self.assertAllEqual([0.5], updated_var_values[1]) + + class MultiWorkerMirroredStrategyTest( multi_worker_test_base.MultiWorkerTestBase, diff --git a/tensorflow/contrib/distribute/python/moving_averages_test.py b/tensorflow/contrib/distribute/python/moving_averages_test.py new file mode 100644 index 0000000000000000000000000000000000000000..119352ad9195dc51201863f34aef19cb3289e635 --- /dev/null +++ b/tensorflow/contrib/distribute/python/moving_averages_test.py @@ -0,0 +1,141 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for training.moving_averages when using a DistributionStrategy.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +from tensorflow.contrib.distribute.python import combinations +from tensorflow.python.eager import test +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import variables +from tensorflow.python.training import moving_averages + + +all_combinations = combinations.combine( + distribution=[combinations.default_strategy, + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu], + mode=["graph"]) + + +class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase): + + @combinations.generate(all_combinations) + def testTowerModeWithoutZeroDebias(self, distribution): + tower_id = [0] + + def tower_fn(): + var = variables.Variable([10.0, 11.0]) + val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]]) + tower_id[0] += 1 + decay = 0.25 + assign = moving_averages.assign_moving_average( + var, val, decay, zero_debias=False) + return var, assign + + with distribution.scope(), self.cached_session() as sess: + var, assign = distribution.call_for_each_tower(tower_fn) + variables.global_variables_initializer().run() + self.assertAllClose([10.0, 11.0], var.eval()) + sess.run(distribution.unwrap(assign)) + # Mean of val across calls to tower_fn(). + average_val = [1.0 + 0.5 * (tower_id[0] - 1), + 2.0 - 0.5 * (tower_id[0] - 1)] + val_weight = 1.0 - 0.25 + self.assertAllClose( + [10.0 * 0.25 + average_val[0] * val_weight, + 11.0 * 0.25 + average_val[1] * val_weight], + var.eval()) + + @combinations.generate(all_combinations) + def testTowerMode(self, distribution): + tower_id = [0] + + def tower_fn(): + var = variables.Variable([0.0, 0.0]) + val = constant_op.constant([1.0 + tower_id[0], 2.0 - tower_id[0]]) + tower_id[0] += 1 + decay = 0.25 + assign = moving_averages.assign_moving_average(var, val, decay) + return var, assign.op + + with distribution.scope(), self.cached_session() as sess: + var, assign_op = distribution.call_for_each_tower(tower_fn) + variables.global_variables_initializer().run() + self.assertAllClose([0.0, 0.0], var.eval()) + sess.run(distribution.unwrap(assign_op)) + # Mean of val across calls to tower_fn(). + average_val = [1.0 + 0.5 * (tower_id[0] - 1), + 2.0 - 0.5 * (tower_id[0] - 1)] + self.assertAllClose(average_val, var.eval()) + + @combinations.generate(all_combinations) + def testCrossTowerWithoutZeroDebias(self, distribution): + with distribution.scope(), self.cached_session() as sess: + var = variables.Variable([10.0, 11.0]) + val = constant_op.constant([1.0, 2.0]) + decay = 0.25 + # NOTE(josh11b): We currently generate an error if val is a PerDevice value. + assign = moving_averages.assign_moving_average( + var, val, decay, zero_debias=False) + + variables.global_variables_initializer().run() + self.assertAllClose([10.0, 11.0], var.eval()) + sess.run(assign) + average_val = [1.0, 2.0] + val_weight = 1.0 - 0.25 + self.assertAllClose( + [10.0 * 0.25 + average_val[0] * val_weight, + 11.0 * 0.25 + average_val[1] * val_weight], + var.eval()) + # Also try assign.op. + sess.run(assign.op) + orig_weight = 0.25 * 0.25 + val_weight = 1.0 - orig_weight + self.assertAllClose( + [10.0 * orig_weight + average_val[0] * val_weight, + 11.0 * orig_weight + average_val[1] * val_weight], + var.eval()) + + @combinations.generate(all_combinations) + def testCrossTower(self, distribution): + with distribution.scope(), self.cached_session() as sess: + var = variables.Variable([0.0, 0.0]) + val = array_ops.placeholder(dtypes.float32) + decay = 0.25 + # NOTE(josh11b): We currently generate an error if val is a PerDevice value. + assign = moving_averages.assign_moving_average(var, val, decay) + + variables.global_variables_initializer().run() + self.assertAllClose([0.0, 0.0], var.eval()) + sess.run(assign, feed_dict={val: [1.0, 2.0]}) + self.assertAllClose([1.0, 2.0], var.eval()) + + # Also try assign.op. + sess.run(assign.op, feed_dict={val: [10.0, 0.0]}) + self.assertAllClose( + [(1.0 * 0.25 + 10.0) / (1.0 * 0.25 + 1.0), + (2.0 * 0.25 + 0.0) / (1.0 * 0.25 + 1.0)], + var.eval()) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py index 353d11a5831904abd43828f1d9d4abfc61aede60..9c112e4f851b5e5e6f65c0bd9d9564420f8d4446 100644 --- a/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py +++ b/tensorflow/contrib/distribute/python/parameter_server_strategy_test.py @@ -262,7 +262,9 @@ class ParameterServerStrategyTestBase( h = f + 1.0 self.assertEqual( device_util.canonicalize(u.device), tower_variable_device) - self.assertEqual(device_util.canonicalize(x.device), h.device) + self.assertEqual( + device_util.canonicalize(x.device), + device_util.canonicalize(h.device)) return y_add, z_add, f y, z, f = d.call_for_each_tower(model_fn) diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py index 0dd78ba185bcbf126c2e1880a94f67a2781f229d..472cb4230c5155369ccf05eef2f82f86f8881bf2 100644 --- a/tensorflow/contrib/distribute/python/values.py +++ b/tensorflow/contrib/distribute/python/values.py @@ -475,6 +475,11 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase): self._aggregation = aggregation # Needed for GradientTape self._trainable = self._primary_var.trainable + # Typically like `DistributedVariable`, a `TPUMirroredVariable`'s + # initializer is composed of the initializers of the components variables. + # However, in some cases, such as when restoring from a checkpoint, we may + # set the _initializer_op property on the entire `TPUMirroredVariable`. + self._initializer_op = None def _get(self, device=None): """Returns the value for the current device or raises a ValueError.""" @@ -704,8 +709,12 @@ class TPUMirroredVariable(checkpointable.CheckpointableBase): @property def initializer(self): - return control_flow_ops.group( - [v.initializer for v in nest.flatten(self._index)]) + if self._initializer_op: + init_op = self._initializer_op + else: + init_op = control_flow_ops.group( + [v.initializer for v in self._index.values()]) + return init_op @property def graph(self): diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 5cec93c4df2e970f203253be6342bb292f296eb0..343eae3440e30f7d328cd214c5c2cc8208b310e2 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -18,69 +18,73 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member +from tensorflow.python.util import deprecation -from tensorflow.contrib.distributions.python.ops import bijectors -from tensorflow.contrib.distributions.python.ops.autoregressive import * -from tensorflow.contrib.distributions.python.ops.batch_reshape import * -from tensorflow.contrib.distributions.python.ops.binomial import * -from tensorflow.contrib.distributions.python.ops.cauchy import * -from tensorflow.contrib.distributions.python.ops.chi2 import * -from tensorflow.contrib.distributions.python.ops.conditional_distribution import * -from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * -from tensorflow.contrib.distributions.python.ops.deterministic import * -from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular -from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse -from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform -from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp -from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse -from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag -from tensorflow.contrib.distributions.python.ops.estimator import * -from tensorflow.contrib.distributions.python.ops.geometric import * -from tensorflow.contrib.distributions.python.ops.half_normal import * -from tensorflow.contrib.distributions.python.ops.independent import * -from tensorflow.contrib.distributions.python.ops.inverse_gamma import * -from tensorflow.contrib.distributions.python.ops.kumaraswamy import * -from tensorflow.contrib.distributions.python.ops.logistic import * -from tensorflow.contrib.distributions.python.ops.mixture import * -from tensorflow.contrib.distributions.python.ops.mixture_same_family import * -from tensorflow.contrib.distributions.python.ops.moving_stats import * -from tensorflow.contrib.distributions.python.ops.mvn_diag import * -from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import * -from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import * -from tensorflow.contrib.distributions.python.ops.mvn_tril import * -from tensorflow.contrib.distributions.python.ops.negative_binomial import * -from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import * -from tensorflow.contrib.distributions.python.ops.onehot_categorical import * -from tensorflow.contrib.distributions.python.ops.poisson import * -from tensorflow.contrib.distributions.python.ops.poisson_lognormal import * -from tensorflow.contrib.distributions.python.ops.quantized_distribution import * -from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import * -from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import * -from tensorflow.contrib.distributions.python.ops.sample_stats import * -from tensorflow.contrib.distributions.python.ops.seed_stream import * -from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import * -from tensorflow.contrib.distributions.python.ops.test_util import * -from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import * -from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import * -from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import * -from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import * -from tensorflow.contrib.distributions.python.ops.wishart import * -from tensorflow.python.ops.distributions.bernoulli import * -from tensorflow.python.ops.distributions.beta import * -from tensorflow.python.ops.distributions.categorical import * -from tensorflow.python.ops.distributions.dirichlet import * -from tensorflow.python.ops.distributions.dirichlet_multinomial import * -from tensorflow.python.ops.distributions.distribution import * -from tensorflow.python.ops.distributions.exponential import * -from tensorflow.python.ops.distributions.gamma import * -from tensorflow.python.ops.distributions.kullback_leibler import * -from tensorflow.python.ops.distributions.laplace import * -from tensorflow.python.ops.distributions.multinomial import * -from tensorflow.python.ops.distributions.normal import * -from tensorflow.python.ops.distributions.student_t import * -from tensorflow.python.ops.distributions.transformed_distribution import * -from tensorflow.python.ops.distributions.uniform import * + +# pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member,g-import-not-at-top + +with deprecation.silence(): + from tensorflow.contrib.distributions.python.ops import bijectors + from tensorflow.contrib.distributions.python.ops.autoregressive import * + from tensorflow.contrib.distributions.python.ops.batch_reshape import * + from tensorflow.contrib.distributions.python.ops.binomial import * + from tensorflow.contrib.distributions.python.ops.cauchy import * + from tensorflow.contrib.distributions.python.ops.chi2 import * + from tensorflow.contrib.distributions.python.ops.conditional_distribution import * + from tensorflow.contrib.distributions.python.ops.conditional_transformed_distribution import * + from tensorflow.contrib.distributions.python.ops.deterministic import * + from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular + from tensorflow.contrib.distributions.python.ops.distribution_util import fill_triangular_inverse + from tensorflow.contrib.distributions.python.ops.distribution_util import matrix_diag_transform + from tensorflow.contrib.distributions.python.ops.distribution_util import reduce_weighted_logsumexp + from tensorflow.contrib.distributions.python.ops.distribution_util import softplus_inverse + from tensorflow.contrib.distributions.python.ops.distribution_util import tridiag + from tensorflow.contrib.distributions.python.ops.estimator import * + from tensorflow.contrib.distributions.python.ops.geometric import * + from tensorflow.contrib.distributions.python.ops.half_normal import * + from tensorflow.contrib.distributions.python.ops.independent import * + from tensorflow.contrib.distributions.python.ops.inverse_gamma import * + from tensorflow.contrib.distributions.python.ops.kumaraswamy import * + from tensorflow.contrib.distributions.python.ops.logistic import * + from tensorflow.contrib.distributions.python.ops.mixture import * + from tensorflow.contrib.distributions.python.ops.mixture_same_family import * + from tensorflow.contrib.distributions.python.ops.moving_stats import * + from tensorflow.contrib.distributions.python.ops.mvn_diag import * + from tensorflow.contrib.distributions.python.ops.mvn_diag_plus_low_rank import * + from tensorflow.contrib.distributions.python.ops.mvn_full_covariance import * + from tensorflow.contrib.distributions.python.ops.mvn_tril import * + from tensorflow.contrib.distributions.python.ops.negative_binomial import * + from tensorflow.contrib.distributions.python.ops.normal_conjugate_posteriors import * + from tensorflow.contrib.distributions.python.ops.onehot_categorical import * + from tensorflow.contrib.distributions.python.ops.poisson import * + from tensorflow.contrib.distributions.python.ops.poisson_lognormal import * + from tensorflow.contrib.distributions.python.ops.quantized_distribution import * + from tensorflow.contrib.distributions.python.ops.relaxed_bernoulli import * + from tensorflow.contrib.distributions.python.ops.relaxed_onehot_categorical import * + from tensorflow.contrib.distributions.python.ops.sample_stats import * + from tensorflow.contrib.distributions.python.ops.seed_stream import * + from tensorflow.contrib.distributions.python.ops.sinh_arcsinh import * + from tensorflow.contrib.distributions.python.ops.test_util import * + from tensorflow.contrib.distributions.python.ops.vector_diffeomixture import * + from tensorflow.contrib.distributions.python.ops.vector_exponential_diag import * + from tensorflow.contrib.distributions.python.ops.vector_laplace_diag import * + from tensorflow.contrib.distributions.python.ops.vector_sinh_arcsinh_diag import * + from tensorflow.contrib.distributions.python.ops.wishart import * + from tensorflow.python.ops.distributions.bernoulli import * + from tensorflow.python.ops.distributions.beta import * + from tensorflow.python.ops.distributions.categorical import * + from tensorflow.python.ops.distributions.dirichlet import * + from tensorflow.python.ops.distributions.dirichlet_multinomial import * + from tensorflow.python.ops.distributions.distribution import * + from tensorflow.python.ops.distributions.exponential import * + from tensorflow.python.ops.distributions.gamma import * + from tensorflow.python.ops.distributions.kullback_leibler import * + from tensorflow.python.ops.distributions.laplace import * + from tensorflow.python.ops.distributions.multinomial import * + from tensorflow.python.ops.distributions.normal import * + from tensorflow.python.ops.distributions.student_t import * + from tensorflow.python.ops.distributions.transformed_distribution import * + from tensorflow.python.ops.distributions.uniform import * # pylint: enable=unused-import,wildcard-import,line-too-long,g-importing-member diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 33a1d572a20e68479d3ec1147d4892449e7beb8a..b35ac3abe9a6f37f09c58a311852cf666345aa87 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -249,11 +249,10 @@ py_library( ], ) -py_test( +cuda_py_test( name = "remote_test", srcs = ["remote_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":parameter_server", ":remote", "//tensorflow/contrib/eager/python:tfe", diff --git a/tensorflow/contrib/eager/python/remote_test.py b/tensorflow/contrib/eager/python/remote_test.py index 7aa4b598b833c3419af501b49f1509d18f3530d5..3926de15e71c9917f88fc3f58740b8c75354ab26 100644 --- a/tensorflow/contrib/eager/python/remote_test.py +++ b/tensorflow/contrib/eager/python/remote_test.py @@ -206,6 +206,33 @@ class RemoteExecutionTest(test.TestCase): y = math_ops.matmul(x1, x2) np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy()) + @run_sync_and_async + def testContextDeviceUpdated(self): + """Tests that the context device is correctly updated.""" + + with ops.device("cpu:0"): + x1 = array_ops.ones([2, 2]) + x2 = array_ops.ones([2, 2]) + y = math_ops.matmul(x1, x2) + np.testing.assert_array_equal([[2, 2], [2, 2]], y.numpy()) + + # `y` is placed on the local CPU as expected. + self.assertEqual(y.device, + "/job:%s/replica:0/task:0/device:CPU:0" % JOB_NAME) + + @run_sync_and_async + def testGPUToRemoteCopy(self): + """Tests that the remote copy happens satisfactorily.""" + if not context.context().num_gpus(): + self.skipTest("No GPUs.") + + x1 = array_ops.ones([2, 2]).gpu() + + with ops.device("/job:remote_device/replica:0/task:1/device:CPU:0"): + x2 = x1._copy() # pylint: disable=protected-access + + np.testing.assert_array_equal(x1.numpy(), x2.numpy()) + if __name__ == "__main__": ops.enable_eager_execution() diff --git a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py index a1f1c5f3d7a25ad28c58e9c215b862b6d51f4cd8..b131ed4f12a01a0087390b5bb65f3ac2d5aec657 100644 --- a/tensorflow/contrib/estimator/python/estimator/boosted_trees.py +++ b/tensorflow/contrib/estimator/python/estimator/boosted_trees.py @@ -75,7 +75,7 @@ class _BoostedTreesEstimator(canned_boosted_trees._BoostedTreesBase): # pylint: layer. head: the `Head` instance defined for Estimator. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. weight_column: A string or a `_NumericColumn` created by `tf.feature_column.numeric_column` defining feature column representing @@ -199,7 +199,7 @@ def boosted_trees_classifier_train_in_memory( the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. n_classes: number of label classes. Default is binary classification. Multiclass support is not yet implemented. @@ -345,7 +345,7 @@ def boosted_trees_regressor_train_in_memory( the model. All items in the set should be instances of classes derived from `FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. label_dimension: Number of regression targets per example. Multi-dimensional support is not yet implemented. diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py index 724bc2c82f8289bbaa19a1dbbc1dc81b6e158e02..4e7965ef265022214f88ed74f4c8502fc8a4c897 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_linear_combined.py @@ -118,7 +118,7 @@ class DNNLinearCombinedEstimator(estimator.Estimator): head: A `_Head` instance constructed with a method such as `tf.contrib.estimator.multi_label_head`. model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator + also be used to load checkpoints from the directory into an estimator to continue training a previously saved model. linear_feature_columns: An iterable containing all the feature columns used by linear part of the model. All items in the set must be diff --git a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py index 6ca7aaf98972c76c608c9c397a82ca94286a2656..40a91175b71f27bb9ca72a238a5aea172cf4c360 100644 --- a/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py +++ b/tensorflow/contrib/estimator/python/estimator/dnn_with_layer_annotations.py @@ -248,7 +248,7 @@ def DNNClassifierWithLayerAnnotations( # pylint: disable=invalid-name model. All items in the set should be instances of classes derived from `_FeatureColumn`. model_dir: Directory to save model parameters, graph and etc. This can also - be used to load checkpoints from the directory into a estimator to + be used to load checkpoints from the directory into an estimator to continue training a previously saved model. n_classes: Number of label classes. Defaults to 2, namely binary classification. Must be > 1. diff --git a/tensorflow/contrib/estimator/python/estimator/rnn.py b/tensorflow/contrib/estimator/python/estimator/rnn.py index 98660bb7317ae76a7da7c90a5c890ab8e69037fe..c595f473950e28cd75cd1b56c1b3d409333dbc74 100644 --- a/tensorflow/contrib/estimator/python/estimator/rnn.py +++ b/tensorflow/contrib/estimator/python/estimator/rnn.py @@ -30,7 +30,6 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.layers import core as core_layers from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables @@ -92,55 +91,6 @@ def _make_rnn_cell_fn(num_units, cell_type='basic_rnn'): return rnn_cell_fn -def _concatenate_context_input(sequence_input, context_input): - """Replicates `context_input` across all timesteps of `sequence_input`. - - Expands dimension 1 of `context_input` then tiles it `sequence_length` times. - This value is appended to `sequence_input` on dimension 2 and the result is - returned. - - Args: - sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size, - padded_length, d0]`. - context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`. - - Returns: - A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, - d0 + d1]`. - - Raises: - ValueError: If `sequence_input` does not have rank 3 or `context_input` does - not have rank 2. - """ - seq_rank_check = check_ops.assert_rank( - sequence_input, - 3, - message='sequence_input must have rank 3', - data=[array_ops.shape(sequence_input)]) - seq_type_check = check_ops.assert_type( - sequence_input, - dtypes.float32, - message='sequence_input must have dtype float32; got {}.'.format( - sequence_input.dtype)) - ctx_rank_check = check_ops.assert_rank( - context_input, - 2, - message='context_input must have rank 2', - data=[array_ops.shape(context_input)]) - ctx_type_check = check_ops.assert_type( - context_input, - dtypes.float32, - message='context_input must have dtype float32; got {}.'.format( - context_input.dtype)) - with ops.control_dependencies( - [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): - padded_length = array_ops.shape(sequence_input)[1] - tiled_context_input = array_ops.tile( - array_ops.expand_dims(context_input, 1), - array_ops.concat([[1], [padded_length], [1]], 0)) - return array_ops.concat([sequence_input, tiled_context_input], 2) - - def _select_last_activations(activations, sequence_lengths): """Selects the nth set of activations for each n in `sequence_length`. @@ -222,8 +172,8 @@ def _rnn_logit_fn_builder(output_units, rnn_cell_fn, sequence_feature_columns, context_input = feature_column_lib.input_layer( features=features, feature_columns=context_feature_columns) - sequence_input = _concatenate_context_input(sequence_input, - context_input) + sequence_input = seq_fc.concatenate_context_input( + context_input, sequence_input) cell = rnn_cell_fn(mode) # Ignore output state. diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index aab7d0c9e8874269bfa5f33193b0dc0ba4bbc9cd..a926ffd5982116a21dc7a0fd1ff957d4ecc6bf94 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -27,6 +27,7 @@ py_library( "//tensorflow/python:check_ops", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", "//tensorflow/python:parsing_ops", "//tensorflow/python:sparse_ops", "//tensorflow/python:tensor_shape", @@ -46,9 +47,29 @@ py_test( "//tensorflow/python:dtypes", "//tensorflow/python:errors", "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:training", "//tensorflow/python/feature_column", "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) + +py_test( + name = "sequence_feature_column_integration_test", + srcs = ["python/feature_column/sequence_feature_column_integration_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":sequence_feature_column", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:training", + "//tensorflow/python:util", + "//tensorflow/python/feature_column", + "//tensorflow/python/keras:layers", ], ) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 05bcdac2caa77062f9a8a44a948d2897b439ea1f..dd6da35ed009c07ad3819e7860a283c7837c1f83 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -33,7 +33,6 @@ from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope # pylint: disable=protected-access -# TODO(b/73827486): Support SequenceExample. def sequence_input_layer( @@ -110,6 +109,7 @@ def sequence_input_layer( output_tensors = [] sequence_lengths = [] ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): ordered_columns.append(column) with variable_scope.variable_scope( @@ -121,17 +121,67 @@ def sequence_input_layer( # Flattens the final dimension to produce a 3D Tensor. num_elements = column._variable_shape.num_elements() shape = array_ops.shape(dense_tensor) + target_shape = [shape[0], shape[1], num_elements] output_tensors.append( - array_ops.reshape( - dense_tensor, - shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + array_ops.reshape(dense_tensor, shape=target_shape)) sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) fc._verify_static_batch_size_equality(sequence_lengths, ordered_columns) sequence_length = _assert_all_equal_and_return(sequence_lengths) + return array_ops.concat(output_tensors, -1), sequence_length +def concatenate_context_input(context_input, sequence_input): + """Replicates `context_input` across all timesteps of `sequence_input`. + + Expands dimension 1 of `context_input` then tiles it `sequence_length` times. + This value is appended to `sequence_input` on dimension 2 and the result is + returned. + + Args: + context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`. + sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size, + padded_length, d0]`. + + Returns: + A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, + d0 + d1]`. + + Raises: + ValueError: If `sequence_input` does not have rank 3 or `context_input` does + not have rank 2. + """ + seq_rank_check = check_ops.assert_rank( + sequence_input, + 3, + message='sequence_input must have rank 3', + data=[array_ops.shape(sequence_input)]) + seq_type_check = check_ops.assert_type( + sequence_input, + dtypes.float32, + message='sequence_input must have dtype float32; got {}.'.format( + sequence_input.dtype)) + ctx_rank_check = check_ops.assert_rank( + context_input, + 2, + message='context_input must have rank 2', + data=[array_ops.shape(context_input)]) + ctx_type_check = check_ops.assert_type( + context_input, + dtypes.float32, + message='context_input must have dtype float32; got {}.'.format( + context_input.dtype)) + with ops.control_dependencies( + [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): + padded_length = array_ops.shape(sequence_input)[1] + tiled_context_input = array_ops.tile( + array_ops.expand_dims(context_input, 1), + array_ops.concat([[1], [padded_length], [1]], 0)) + return array_ops.concat([sequence_input, tiled_context_input], 2) + + def sequence_categorical_column_with_identity( key, num_buckets, default_value=None): """Returns a feature column that represents sequences of integers. @@ -453,9 +503,17 @@ class _SequenceNumericColumn( [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], axis=0) dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) - sequence_length = fc._sequence_length_from_sparse_tensor( - sp_tensor, num_elements=self._variable_shape.num_elements()) + + # Get the number of timesteps per example + # For the 2D case, the raw values are grouped according to num_elements; + # for the 3D case, the grouping happens in the third dimension, and + # sequence length is not affected. + num_elements = (self._variable_shape.num_elements() + if sp_tensor.shape.ndims == 2 else 1) + seq_length = fc._sequence_length_from_sparse_tensor( + sp_tensor, num_elements=num_elements) + return fc._SequenceDenseColumn.TensorSequenceLengthPair( - dense_tensor=dense_tensor, sequence_length=sequence_length) + dense_tensor=dense_tensor, sequence_length=seq_length) # pylint: enable=protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py new file mode 100644 index 0000000000000000000000000000000000000000..d8ca363627eace15e039679545366648df174c33 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_integration_test.py @@ -0,0 +1,280 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Integration test for sequence feature columns with SequenceExamples.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import string +import tempfile + +from google.protobuf import text_format + +from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.keras.layers import recurrent +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.util import compat + + +class SequenceFeatureColumnIntegrationTest(test.TestCase): + + def _make_sequence_example(self): + example = example_pb2.SequenceExample() + example.context.feature['int_ctx'].int64_list.value.extend([5]) + example.context.feature['float_ctx'].float_list.value.extend([123.6]) + for val in range(0, 10, 2): + feat = feature_pb2.Feature() + feat.int64_list.value.extend([val] * val) + example.feature_lists.feature_list['int_list'].feature.extend([feat]) + for val in range(1, 11, 2): + feat = feature_pb2.Feature() + feat.bytes_list.value.extend([compat.as_bytes(str(val))] * val) + example.feature_lists.feature_list['str_list'].feature.extend([feat]) + + return example + + def _build_feature_columns(self): + col = fc.categorical_column_with_identity( + 'int_ctx', num_buckets=100) + ctx_cols = [ + fc.embedding_column(col, dimension=10), + fc.numeric_column('float_ctx')] + + identity_col = sfc.sequence_categorical_column_with_identity( + 'int_list', num_buckets=10) + bucket_col = sfc.sequence_categorical_column_with_hash_bucket( + 'bytes_list', hash_bucket_size=100) + seq_cols = [ + fc.embedding_column(identity_col, dimension=10), + fc.embedding_column(bucket_col, dimension=20)] + + return ctx_cols, seq_cols + + def test_sequence_example_into_input_layer(self): + examples = [_make_sequence_example().SerializeToString()] * 100 + ctx_cols, seq_cols = self._build_feature_columns() + + def _parse_example(example): + ctx, seq = parsing_ops.parse_single_sequence_example( + example, + context_features=fc.make_parse_example_spec(ctx_cols), + sequence_features=fc.make_parse_example_spec(seq_cols)) + ctx.update(seq) + return ctx + + ds = dataset_ops.Dataset.from_tensor_slices(examples) + ds = ds.map(_parse_example) + ds = ds.batch(20) + + # Test on a single batch + features = ds.make_one_shot_iterator().get_next() + + # Tile the context features across the sequence features + seq_layer, _ = sfc.sequence_input_layer(features, seq_cols) + ctx_layer = fc.input_layer(features, ctx_cols) + input_layer = sfc.concatenate_context_input(ctx_layer, seq_layer) + + rnn_layer = recurrent.RNN(recurrent.SimpleRNNCell(10)) + output = rnn_layer(input_layer) + + with self.cached_session() as sess: + sess.run(variables.global_variables_initializer()) + features_r = sess.run(features) + self.assertAllEqual(features_r['int_list'].dense_shape, [20, 3, 6]) + + output_r = sess.run(output) + self.assertAllEqual(output_r.shape, [20, 10]) + + +class SequenceExampleParsingTest(test.TestCase): + + def test_seq_ex_in_sequence_categorical_column_with_identity(self): + self._test_parsed_sequence_example( + 'int_list', sfc.sequence_categorical_column_with_identity, + 10, [3, 6], [2, 4, 6]) + + def test_seq_ex_in_sequence_categorical_column_with_hash_bucket(self): + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_hash_bucket, + 10, [3, 4], [compat.as_bytes(x) for x in 'acg']) + + def test_seq_ex_in_sequence_categorical_column_with_vocabulary_list(self): + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_vocabulary_list, + list(string.ascii_lowercase), [3, 4], + [compat.as_bytes(x) for x in 'acg']) + + def test_seq_ex_in_sequence_categorical_column_with_vocabulary_file(self): + _, fname = tempfile.mkstemp() + with open(fname, 'w') as f: + f.write(string.ascii_lowercase) + self._test_parsed_sequence_example( + 'bytes_list', sfc.sequence_categorical_column_with_vocabulary_file, + fname, [3, 4], [compat.as_bytes(x) for x in 'acg']) + + def _test_parsed_sequence_example( + self, col_name, col_fn, col_arg, shape, values): + """Helper function to check that each FeatureColumn parses correctly. + + Args: + col_name: string, name to give to the feature column. Should match + the name that the column will parse out of the features dict. + col_fn: function used to create the feature column. For example, + sequence_numeric_column. + col_arg: second arg that the target feature column is expecting. + shape: the expected dense_shape of the feature after parsing into + a SparseTensor. + values: the expected values at index [0, 2, 6] of the feature + after parsing into a SparseTensor. + """ + example = _make_sequence_example() + columns = [ + fc.categorical_column_with_identity('int_ctx', num_buckets=100), + fc.numeric_column('float_ctx'), + col_fn(col_name, col_arg) + ] + context, seq_features = parsing_ops.parse_single_sequence_example( + example.SerializeToString(), + context_features=fc.make_parse_example_spec(columns[:2]), + sequence_features=fc.make_parse_example_spec(columns[2:])) + + with self.cached_session() as sess: + ctx_result, seq_result = sess.run([context, seq_features]) + self.assertEqual(list(seq_result[col_name].dense_shape), shape) + self.assertEqual( + list(seq_result[col_name].values[[0, 2, 6]]), values) + self.assertEqual(list(ctx_result['int_ctx'].dense_shape), [1]) + self.assertEqual(ctx_result['int_ctx'].values[0], 5) + self.assertEqual(list(ctx_result['float_ctx'].shape), [1]) + self.assertAlmostEqual(ctx_result['float_ctx'][0], 123.6, places=1) + + +_SEQ_EX_PROTO = """ +context { + feature { + key: "float_ctx" + value { + float_list { + value: 123.6 + } + } + } + feature { + key: "int_ctx" + value { + int64_list { + value: 5 + } + } + } +} +feature_lists { + feature_list { + key: "bytes_list" + value { + feature { + bytes_list { + value: "a" + } + } + feature { + bytes_list { + value: "b" + value: "c" + } + } + feature { + bytes_list { + value: "d" + value: "e" + value: "f" + value: "g" + } + } + } + } + feature_list { + key: "float_list" + value { + feature { + float_list { + value: 1.0 + } + } + feature { + float_list { + value: 3.0 + value: 3.0 + value: 3.0 + } + } + feature { + float_list { + value: 5.0 + value: 5.0 + value: 5.0 + value: 5.0 + value: 5.0 + } + } + } + } + feature_list { + key: "int_list" + value { + feature { + int64_list { + value: 2 + value: 2 + } + } + feature { + int64_list { + value: 4 + value: 4 + value: 4 + value: 4 + } + } + feature { + int64_list { + value: 6 + value: 6 + value: 6 + value: 6 + value: 6 + value: 6 + } + } + } + } +} +""" + + +def _make_sequence_example(): + example = example_pb2.SequenceExample() + return text_format.Parse(_SEQ_EX_PROTO, example) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 45d7b740462ca21139e2e93e34b43668f1e08a94..707f93b2da5d24a3c1e5c6097a21d8fed4c11b8b 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import os +from absl.testing import parameterized import numpy as np from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc @@ -28,28 +29,63 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.platform import test from tensorflow.python.training import monitored_session -class SequenceInputLayerTest(test.TestCase): +class SequenceInputLayerTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_args_a': { + # example 0, ids [2] + # example 1, ids [0, 1] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, + 'sparse_input_args_b': { + # example 0, ids [1] + # example 1, ids [2, 0] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (1, 2, 0), + 'dense_shape': (2, 2)}, + 'expected_input_layer': [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]],], + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'sparse_input_args_a': { + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + 'indices': ( + (0, 0, 0), (0, 1, 0), (0, 1, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2, 0, 1, 0, 0, 1), + 'dense_shape': (2, 2, 2)}, + 'sparse_input_args_b': { + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[2], [0]] + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (1, 1, 1, 2, 0), + 'dense_shape': (2, 2, 2)}, + 'expected_input_layer': [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[5., 6., 14., 15., 16.], [2., 3., 14., 15., 16.]], + # feature 1, [a: 0, 0, b: 2, -], [a: 1, -, b: 0, -] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_embedding_column( + self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, + expected_sequence_length): - def test_embedding_column(self): + sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a) + sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b) vocabulary_size = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [2, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - embedding_dimension_a = 2 embedding_values_a = ( (1., 2.), # id 0 @@ -70,14 +106,6 @@ class SequenceInputLayerTest(test.TestCase): return embedding_values return _initializer - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [2, 0] - [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], - ] - expected_sequence_length = [1, 2] - categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column_a = fc.embedding_column( @@ -233,29 +261,56 @@ class SequenceInputLayerTest(test.TestCase): }, feature_columns=shared_embedding_columns) - def test_indicator_column(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_args_a': { + # example 0, ids [2] + # example 1, ids [0, 1] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, + 'sparse_input_args_b': { + # example 0, ids [1] + # example 1, ids [1, 0] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (1, 1, 0), + 'dense_shape': (2, 2)}, + 'expected_input_layer': [ + # example 0, ids_a [2], ids_b [1] + [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [1, 0] + [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'sparse_input_args_a': { + # feature 0, ids [[2], [0, 1]] + # feature 1, ids [[0, 0], [1]] + 'indices': ( + (0, 0, 0), (0, 1, 0), (0, 1, 1), + (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2, 0, 1, 0, 0, 1), + 'dense_shape': (2, 2, 2)}, + 'sparse_input_args_b': { + # feature 0, ids [[1, 1], [1]] + # feature 1, ids [[1], [0]] + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (1, 1, 1, 1, 0), + 'dense_shape': (2, 2, 2)}, + 'expected_input_layer': [ + # feature 0, [a: 2, -, b: 1, 1], [a: 0, 1, b: 1, -] + [[0., 0., 1., 0., 2.], [1., 1., 0., 0., 1.]], + # feature 1, [a: 0, 0, b: 1, -], [a: 1, -, b: 0, -] + [[2., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_indicator_column( + self, sparse_input_args_a, sparse_input_args_b, expected_input_layer, + expected_sequence_length): + sparse_input_a = sparse_tensor.SparseTensorValue(**sparse_input_args_a) + sparse_input_b = sparse_tensor.SparseTensorValue(**sparse_input_args_b) + vocabulary_size_a = 3 - sparse_input_a = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) vocabulary_size_b = 2 - sparse_input_b = sparse_tensor.SparseTensorValue( - # example 0, ids [1] - # example 1, ids [1, 0] - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 1, 0), - dense_shape=(2, 2)) - - expected_input_layer = [ - # example 0, ids_a [2], ids_b [1] - [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], - # example 1, ids_a [0, 1], ids_b [1, 0] - [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], - ] - expected_sequence_length = [1, 2] categorical_column_a = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size_a) @@ -298,18 +353,34 @@ class SequenceInputLayerTest(test.TestCase): features={'aaa': sparse_input}, feature_columns=[indicator_column_a]) - def test_numeric_column(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_input_layer = [ - [[0.], [1.]], - [[10.], [0.]], - ] - expected_sequence_length = [2, 1] + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_args': { + # example 0, values [0., 1] + # example 1, [10.] + 'indices': ((0, 0), (0, 1), (1, 0)), + 'values': (0., 1., 10.), + 'dense_shape': (2, 2)}, + 'expected_input_layer': [ + [[0.], [1.]], + [[10.], [0.]]], + 'expected_sequence_length': [2, 1]}, + {'testcase_name': '3D', + 'sparse_input_args': { + # feature 0, ids [[20, 3], [5]] + # feature 1, ids [[3], [8]] + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (20, 3, 5., 3., 8.), + 'dense_shape': (2, 2, 2)}, + 'expected_input_layer': [ + [[20.], [3.], [5.], [0.]], + [[3.], [0.], [8.], [0.]]], + 'expected_sequence_length': [2, 2]}, + ) + def test_numeric_column( + self, sparse_input_args, expected_input_layer, expected_sequence_length): + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) + numeric_column = sfc.sequence_numeric_column('aaa') input_layer, sequence_length = sfc.sequence_input_layer( @@ -321,21 +392,40 @@ class SequenceInputLayerTest(test.TestCase): self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess)) - def test_numeric_column_multi_dim(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_args': { + # example 0, values [0., 1., 2., 3., 4., 5., 6., 7.] + # example 1, [10., 11., 12., 13.] + 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 8)}, + 'expected_input_layer': [ + # The output of numeric_column._get_dense_tensor should be flattened. + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]]], + 'expected_sequence_length': [2, 1]}, + {'testcase_name': '3D', + 'sparse_input_args': { + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + 'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 2, 4)}, + 'expected_input_layer': [ + # The output of numeric_column._get_dense_tensor should be flattened. + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]]], + 'expected_sequence_length': [2, 1]}, + ) + def test_numeric_column_multi_dim( + self, sparse_input_args, expected_input_layer, expected_sequence_length): """Tests sequence_input_layer for multi-dimensional numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - # The output of numeric_column._get_dense_tensor should be flattened. - expected_input_layer = [ - [[0., 1., 2., 3.], [4., 5., 6., 7.]], - [[10., 11., 12., 13.], [0., 0., 0., 0.]], - ] - expected_sequence_length = [2, 1] + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) input_layer, sequence_length = sfc.sequence_input_layer( @@ -377,6 +467,138 @@ class SequenceInputLayerTest(test.TestCase): r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'): sess.run(sequence_length) + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_args': { + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 8)}, + 'expected_shape': [2, 2, 4]}, + {'testcase_name': '3D', + 'sparse_input_args': { + # example 0, values [[0., 1., 2., 3.]], [[4., 5., 6., 7.]] + # example 1, [[10., 11., 12., 13.], []] + 'indices': ((0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), + (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 2), + (1, 0, 0), (1, 0, 1), (1, 0, 2), (1, 0, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 2, 4)}, + 'expected_shape': [2, 2, 4]}, + ) + def test_static_shape_from_tensors_numeric( + self, sparse_input_args, expected_shape): + """Tests that we return a known static shape when we have one.""" + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_args': { + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), + 'values': (2, 0, 1, 1), + 'dense_shape': (4, 2)}, + 'expected_shape': [4, 2, 3]}, + {'testcase_name': '3D', + 'sparse_input_args': { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [0, 2]] + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + 'values': (2, 0, 1, 2, 1, 0, 2), + 'dense_shape': (4, 2, 2)}, + 'expected_shape': [4, 2, 3]} + ) + def test_static_shape_from_tensors_indicator( + self, sparse_input_args, expected_shape): + """Tests that we return a known static shape when we have one.""" + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=3) + indicator_column = fc.indicator_column(categorical_column) + + input_layer, _ = sfc.sequence_input_layer( + features={'aaa': sparse_input}, feature_columns=[indicator_column]) + shape = input_layer.get_shape() + self.assertEqual(shape, expected_shape) + + +class ConcatenateContextInputTest(test.TestCase, parameterized.TestCase): + """Tests the utility fn concatenate_context_input.""" + + def test_concatenate_context_input(self): + seq_input = ops.convert_to_tensor(np.arange(12).reshape(2, 3, 2)) + context_input = ops.convert_to_tensor(np.arange(10).reshape(2, 5)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + input_layer = sfc.concatenate_context_input(context_input, seq_input) + + expected = np.array([ + [[0, 1, 0, 1, 2, 3, 4], [2, 3, 0, 1, 2, 3, 4], [4, 5, 0, 1, 2, 3, 4]], + [[6, 7, 5, 6, 7, 8, 9], [8, 9, 5, 6, 7, 8, 9], [10, 11, 5, 6, 7, 8, 9]] + ], dtype=np.float32) + with monitored_session.MonitoredSession() as sess: + output = sess.run(input_layer) + self.assertAllEqual(expected, output) + + @parameterized.named_parameters( + {'testcase_name': 'rank_lt_3', + 'seq_input_arg': np.arange(100).reshape(10, 10)}, + {'testcase_name': 'rank_gt_3', + 'seq_input_arg': np.arange(100).reshape(5, 5, 2, 2)} + ) + def test_sequence_input_throws_error(self, seq_input_arg): + seq_input = ops.convert_to_tensor(seq_input_arg) + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'sequence_input must have rank 3'): + sfc.concatenate_context_input(context_input, seq_input) + + @parameterized.named_parameters( + {'testcase_name': 'rank_lt_2', + 'context_input_arg': np.arange(100)}, + {'testcase_name': 'rank_gt_2', + 'context_input_arg': np.arange(100).reshape(5, 5, 4)} + ) + def test_context_input_throws_error(self, context_input_arg): + context_input = ops.convert_to_tensor(context_input_arg) + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'context_input must have rank 2'): + sfc.concatenate_context_input(context_input, seq_input) + + def test_integer_seq_input_throws_error(self): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + context_input = math_ops.cast(context_input, dtype=dtypes.float32) + with self.assertRaisesRegexp( + TypeError, 'sequence_input must have dtype float32'): + sfc.concatenate_context_input(context_input, seq_input) + + def test_integer_context_input_throws_error(self): + seq_input = ops.convert_to_tensor(np.arange(100).reshape(5, 5, 4)) + context_input = ops.convert_to_tensor(np.arange(100).reshape(10, 10)) + seq_input = math_ops.cast(seq_input, dtype=dtypes.float32) + with self.assertRaisesRegexp( + TypeError, 'context_input must have dtype float32'): + sfc.concatenate_context_input(context_input, seq_input) + class InputLayerTest(test.TestCase): """Tests input_layer with sequence feature columns.""" @@ -443,75 +665,83 @@ def _assert_sparse_tensor_indices_shape(test_case, expected, actual): test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) -class SequenceCategoricalColumnWithIdentityTest(test.TestCase): - - def test_get_sparse_tensors(self): - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(1, 2, 0), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((1, 2, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) +class SequenceCategoricalColumnWithIdentityTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (1, 2, 0), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), + 'values': np.array((1, 2, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, + {'testcase_name': '3D', + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': (6, 7, 8), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': (6, 7, 8), + 'dense_shape': (2, 2, 2)}} + ) + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) + column = sfc.sequence_categorical_column_with_identity('aaa', num_buckets=9) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - def test_get_sparse_tensors_inputs3d(self): - """Tests _get_sparse_tensors when the input is already 3D Tensor.""" - column = sfc.sequence_categorical_column_with_identity( - 'aaa', num_buckets=3) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=(1, 2, 0), - dense_shape=(2, 2, 1)) - - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'Column aaa expected ID tensor of rank 2\.\s*' - r'id_tensor shape:\s*\[2 2 1\]'): - id_weight_pair = column._get_sparse_tensors( - _LazyBuilder({'aaa': inputs})) - with monitored_session.MonitoredSession() as sess: - id_weight_pair.id_tensor.eval(session=sess) - - -class SequenceCategoricalColumnWithHashBucketTest(test.TestCase): - - def test_get_sparse_tensors(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceCategoricalColumnWithHashBucketTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': ('omar', 'stringer', 'marlo'), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), + # Ignored to avoid hash dependence in test. + 'values': np.array((0, 0, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, + {'testcase_name': '3D', + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': ('omar', 'stringer', 'marlo'), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + # Ignored to avoid hash dependence in test. + 'values': np.array((0, 0, 0), dtype=np.int64), + 'dense_shape': (2, 2, 2)}} + ) + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) column = sfc.sequence_categorical_column_with_hash_bucket( 'aaa', hash_bucket_size=10) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('omar', 'stringer', 'marlo'), - dense_shape=(2, 2)) - - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - # Ignored to avoid hash dependence in test. - values=np.array((0, 0, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_indices_shape( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) + self, expected, id_weight_pair.id_tensor.eval(session=sess)) -class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): +class SequenceCategoricalColumnWithVocabularyFileTest( + test.TestCase, parameterized.TestCase): def _write_vocab(self, vocab_strings, file_name): vocab_file = os.path.join(self.get_temp_dir(), file_name) @@ -527,68 +757,125 @@ class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): 'wire_vocabulary.txt') self._wire_vocabulary_size = 3 - def test_get_sparse_tensors(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': ('marlo', 'skywalker', 'omar'), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), + 'values': np.array((2, -1, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, + {'testcase_name': '3D', + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': ('omar', 'skywalker', 'marlo'), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': np.array((0, -1, 2), dtype=np.int64), + 'dense_shape': (2, 2, 2)}} + ) + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) column = sfc.sequence_categorical_column_with_vocabulary_file( key='aaa', vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=self._wire_vocabulary_size) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - -class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase): - - def test_get_sparse_tensors(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceCategoricalColumnWithVocabularyListTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': ('marlo', 'skywalker', 'omar'), + 'dense_shape': (2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 0), (1, 0, 0), (1, 1, 0)), + 'values': np.array((2, -1, 0), dtype=np.int64), + 'dense_shape': (2, 2, 1)}}, + {'testcase_name': '3D', + 'inputs_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': ('omar', 'skywalker', 'marlo'), + 'dense_shape': (2, 2, 2)}, + 'expected_args': { + 'indices': ((0, 0, 2), (1, 0, 0), (1, 2, 0)), + 'values': np.array((0, -1, 2), dtype=np.int64), + 'dense_shape': (2, 2, 2)}} + ) + def test_get_sparse_tensors(self, inputs_args, expected_args): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + expected = sparse_tensor.SparseTensorValue(**expected_args) column = sfc.sequence_categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo')) - inputs = sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - expected_sparse_ids = sparse_tensor.SparseTensorValue( - indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), - values=np.array((2, -1, 0), dtype=np.int64), - dense_shape=(2, 2, 1)) id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) self.assertIsNone(id_weight_pair.weight_tensor) with monitored_session.MonitoredSession() as sess: _assert_sparse_tensor_value( - self, - expected_sparse_ids, - id_weight_pair.id_tensor.eval(session=sess)) - - -class SequenceEmbeddingColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): + self, expected, id_weight_pair.id_tensor.eval(session=sess)) + + +class SequenceEmbeddingColumnTest( + test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), + 'values': (2, 0, 1, 1), + 'dense_shape': (4, 2)}, + 'expected': [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]]]}, + {'testcase_name': '3D', + 'inputs_args': { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [0, 2]] + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + 'values': (2, 0, 1, 2, 1, 0, 2), + 'dense_shape': (4, 2, 2)}, + 'expected': [ + # example 0, ids [[2]] + [[7., 11.], [0., 0.]], + # example 1, ids [[0, 1], [2]] + [[2, 3.5], [7., 11.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [[1], [0, 2]] + [[3., 5.], [4., 6.5]]]} + ) + def test_get_sequence_dense_tensor(self, inputs_args, expected): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - embedding_dimension = 2 embedding_values = ( (1., 2.), # id 0 @@ -601,17 +888,6 @@ class SequenceEmbeddingColumnTest(test.TestCase): self.assertIsNone(partition_info) return embedding_values - expected_lookups = [ - # example 0, ids [2] - [[7., 11.], [0., 0.]], - # example 1, ids [0, 1] - [[1., 2.], [3., 5.]], - # example 2, ids [] - [[0., 0.], [0., 0.]], - # example 3, ids [1] - [[3., 5.], [0., 0.]], - ] - categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) embedding_column = fc.embedding_column( @@ -619,24 +895,36 @@ class SequenceEmbeddingColumnTest(test.TestCase): initializer=_initializer) embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertItemsEqual( ('embedding_weights:0',), tuple([v.name for v in global_vars])) with monitored_session.MonitoredSession() as sess: self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) - self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) - - def test_sequence_length(self): + self.assertAllEqual(expected, embedding_lookup.eval(session=sess)) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + # example 0, ids [2] + # example 1, ids [0, 1] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'inputs_args': { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2, 0, 1, 2), + 'dense_shape': (2, 2, 2)}, + 'expected_sequence_length': [1, 2]} + ) + def test_sequence_length(self, inputs_args, expected_sequence_length): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) @@ -644,7 +932,7 @@ class SequenceEmbeddingColumnTest(test.TestCase): categorical_column, dimension=2) _, sequence_length = embedding_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) @@ -855,56 +1143,89 @@ class SequenceSharedEmbeddingColumnTest(test.TestCase): expected_sequence_length_b, sequence_length_b.eval(session=sess)) -class SequenceIndicatorColumnTest(test.TestCase): - - def test_get_sequence_dense_tensor(self): +class SequenceIndicatorColumnTest(test.TestCase, parameterized.TestCase): + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + 'indices': ((0, 0), (1, 0), (1, 1), (3, 0)), + 'values': (2, 0, 1, 1), + 'dense_shape': (4, 2)}, + 'expected': [ + # example 0, ids [2] + [[0., 0., 1.], [0., 0., 0.]], + # example 1, ids [0, 1] + [[1., 0., 0.], [0., 1., 0.]], + # example 2, ids [] + [[0., 0., 0.], [0., 0., 0.]], + # example 3, ids [1] + [[0., 1., 0.], [0., 0., 0.]]]}, + {'testcase_name': '3D', + 'inputs_args': { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + # example 2, ids [] + # example 3, ids [[1], [2, 2]] + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0), + (3, 0, 0), (3, 1, 0), (3, 1, 1)), + 'values': (2, 0, 1, 2, 1, 2, 2), + 'dense_shape': (4, 2, 2)}, + 'expected': [ + # example 0, ids [[2]] + [[0., 0., 1.], [0., 0., 0.]], + # example 1, ids [[0, 1], [2]] + [[1., 1., 0.], [0., 0., 1.]], + # example 2, ids [] + [[0., 0., 0.], [0., 0., 0.]], + # example 3, ids [[1], [2, 2]] + [[0., 1., 0.], [0., 0., 2.]]]} + ) + def test_get_sequence_dense_tensor(self, inputs_args, expected): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 1), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(4, 2)) - - expected_lookups = [ - # example 0, ids [2] - [[0., 0., 1.], [0., 0., 0.]], - # example 1, ids [0, 1] - [[1., 0., 0.], [0., 1., 0.]], - # example 2, ids [] - [[0., 0., 0.], [0., 0., 0.]], - # example 3, ids [1] - [[0., 1., 0.], [0., 0., 0.]], - ] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess)) - - def test_sequence_length(self): + self.assertAllEqual(expected, indicator_tensor.eval(session=sess)) + + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + # example 0, ids [2] + # example 1, ids [0, 1] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2, 0, 1), + 'dense_shape': (2, 2)}, + 'expected_sequence_length': [1, 2]}, + {'testcase_name': '3D', + 'inputs_args': { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2, 0, 1, 2), + 'dense_shape': (2, 2, 2)}, + 'expected_sequence_length': [1, 2]} + ) + def test_sequence_length(self, inputs_args, expected_sequence_length): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - indices=((0, 0), (1, 0), (1, 1)), - values=(2, 0, 1), - dense_shape=(2, 2)) - expected_sequence_length = [1, 2] categorical_column = sfc.sequence_categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) indicator_column = fc.indicator_column(categorical_column) _, sequence_length = indicator_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) @@ -938,7 +1259,7 @@ class SequenceIndicatorColumnTest(test.TestCase): expected_sequence_length, sequence_length.eval(session=sess)) -class SequenceNumericColumnTest(test.TestCase): +class SequenceNumericColumnTest(test.TestCase, parameterized.TestCase): def test_defaults(self): a = sfc.sequence_numeric_column('aaa') @@ -971,25 +1292,37 @@ class SequenceNumericColumnTest(test.TestCase): with self.assertRaisesRegexp(TypeError, 'must be a callable'): sfc.sequence_numeric_column('aaa', normalizer_fn='NotACallable') - def test_get_sequence_dense_tensor(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_dense_tensor = [ - [[0.], [1.]], - [[10.], [0.]], - ] + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + # example 0, values [0., 1] + # example 1, [10.] + 'indices': ((0, 0), (0, 1), (1, 0)), + 'values': (0., 1., 10.), + 'dense_shape': (2, 2)}, + 'expected': [ + [[0.], [1.]], + [[10.], [0.]]]}, + {'testcase_name': '3D', + 'inputs_args': { + # feature 0, ids [[20, 3], [5]] + # feature 1, ids [[3], [8]] + 'indices': ((0, 0, 0), (0, 0, 1), (0, 1, 0), (1, 0, 0), (1, 1, 0)), + 'values': (20, 3, 5., 3., 8.), + 'dense_shape': (2, 2, 2)}, + 'expected': [ + [[20.], [3.], [5.], [0.]], + [[3.], [0.], [8.], [0.]]]}, + ) + def test_get_sequence_dense_tensor(self, inputs_args, expected): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) numeric_column = sfc.sequence_numeric_column('aaa') dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) + self.assertAllEqual(expected, dense_tensor.eval(session=sess)) def test_get_sequence_dense_tensor_with_normalizer_fn(self): @@ -1026,41 +1359,35 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_get_sequence_dense_tensor_with_shape(self): - """Tests get_sequence_dense_tensor with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_dense_tensor = [ - [[0., 1., 2.], [3., 4., 5.]], - [[10., 11., 12.], [0., 0., 0.]], - ] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) - - dense_tensor, _ = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_dense_tensor, dense_tensor.eval(session=sess)) - - def test_get_dense_tensor_multi_dim(self): + @parameterized.named_parameters( + {'testcase_name': '2D', + 'sparse_input_args': { + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + 'indices': ((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), + (0, 7), (1, 0), (1, 1), (1, 2), (1, 3)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 8)}, + 'expected_dense_tensor': [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]]]}, + {'testcase_name': '3D', + 'sparse_input_args': { + 'indices': ((0, 0, 0), (0, 0, 2), (0, 0, 4), (0, 0, 6), + (0, 1, 0), (0, 1, 2), (0, 1, 4), (0, 1, 6), + (1, 0, 0), (1, 0, 2), (1, 0, 4), (1, 0, 6)), + 'values': (0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + 'dense_shape': (2, 2, 8)}, + 'expected_dense_tensor': [ + [[[0., 0.], [1., 0.]], [[2., 0.], [3., 0.]], + [[4., 0.], [5., 0.]], [[6., 0.], [7., 0.]]], + [[[10., 0.], [11., 0.]], [[12., 0.], [13., 0.]], + [[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]]]}, + ) + def test_get_dense_tensor_multi_dim( + self, sparse_input_args, expected_dense_tensor): """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] - # example 1, [[[10., 11.], [12., 13.]]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), - (1, 0), (1, 1), (1, 2), (1, 3)), - values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), - dense_shape=(2, 8)) - expected_dense_tensor = [ - [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], - [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], - ] + sparse_input = sparse_tensor.SparseTensorValue(**sparse_input_args) numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) dense_tensor, _ = numeric_column._get_sequence_dense_tensor( @@ -1070,43 +1397,56 @@ class SequenceNumericColumnTest(test.TestCase): self.assertAllEqual( expected_dense_tensor, dense_tensor.eval(session=sess)) - def test_sequence_length(self): - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0., 1., 2.], [3., 4., 5.]] - # example 1, [[10., 11., 12.]] - indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), - (1, 0), (1, 1), (1, 2)), - values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), - dense_shape=(2, 6)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + @parameterized.named_parameters( + {'testcase_name': '2D', + 'inputs_args': { + # example 0, ids [2] + # example 1, ids [0, 1] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2., 0., 1.), + 'dense_shape': (2, 2)}, + 'expected_sequence_length': [1, 2], + 'shape': (1,)}, + {'testcase_name': '3D', + 'inputs_args': { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2., 0., 1., 2.), + 'dense_shape': (2, 2, 2)}, + 'expected_sequence_length': [1, 2], + 'shape': (1,)}, + {'testcase_name': '2D_with_shape', + 'inputs_args': { + # example 0, ids [2] + # example 1, ids [0, 1] + 'indices': ((0, 0), (1, 0), (1, 1)), + 'values': (2., 0., 1.), + 'dense_shape': (2, 2)}, + 'expected_sequence_length': [1, 1], + 'shape': (2,)}, + {'testcase_name': '3D_with_shape', + 'inputs_args': { + # example 0, ids [[2]] + # example 1, ids [[0, 1], [2]] + 'indices': ((0, 0, 0), (1, 0, 0), (1, 0, 1), (1, 1, 0)), + 'values': (2., 0., 1., 2.), + 'dense_shape': (2, 2, 2)}, + 'expected_sequence_length': [1, 2], + 'shape': (2,)}, + ) + def test_sequence_length(self, inputs_args, expected_sequence_length, shape): + inputs = sparse_tensor.SparseTensorValue(**inputs_args) + numeric_column = sfc.sequence_numeric_column('aaa', shape=shape) _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) + _LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: sequence_length = sess.run(sequence_length) self.assertAllEqual(expected_sequence_length, sequence_length) self.assertEqual(np.int64, sequence_length.dtype) - def test_sequence_length_with_shape(self): - """Tests _sequence_length with shape !=(1,).""" - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, values [[0.], [1]] - # example 1, [[10.]] - indices=((0, 0), (0, 1), (1, 0)), - values=(0., 1., 10.), - dense_shape=(2, 2)) - expected_sequence_length = [2, 1] - numeric_column = sfc.sequence_numeric_column('aaa') - - _, sequence_length = numeric_column._get_sequence_dense_tensor( - _LazyBuilder({'aaa': sparse_input})) - - with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) - def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" sparse_input = sparse_tensor.SparseTensorValue( diff --git a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py index 2b7bb5f14e7f3d1b3f913d3426efaaae19079ffb..e4fac1976d605f1942947a747043d5c8b00392c1 100644 --- a/tensorflow/contrib/gan/python/features/python/clip_weights_test.py +++ b/tensorflow/contrib/gan/python/features/python/clip_weights_test.py @@ -47,13 +47,13 @@ class ClipWeightsTest(test.TestCase): train_op1 = opt.minimize(loss, var_list=self.variables) train_op2 = opt_clip.minimize(loss, var_list=self.variables) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) self.assertEqual(2.0, self.variables[0].eval()) sess.run(train_op1) self.assertLess(0.1, self.variables[0].eval()) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: sess.run(variables.global_variables_initializer()) self.assertEqual(2.0, self.variables[0].eval()) sess.run(train_op2) diff --git a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py index 08584dcd656e3e7a079a3fa36f44742b5eac1178..3c9dfd6de024b1558bed2e3678606fef8bb4d677 100644 --- a/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py +++ b/tensorflow/contrib/gan/python/features/python/random_tensor_pool_test.py @@ -37,7 +37,7 @@ class TensorPoolTest(test.TestCase): output_value = tensor_pool(input_value, pool_size=10) self.assertEqual(output_value.shape.as_list(), [None, None, 3]) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: for i in range(10): session.run(output_value, {input_value: [[[i] * 3]]}) session.run(output_value, {input_value: [[[i] * 3] * 2]}) @@ -49,7 +49,7 @@ class TensorPoolTest(test.TestCase): output_value = tensor_pool(input_value, pool_size=10) self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: outs = [] for i in range(50): out = session.run(output_value, {input_value: i}) @@ -67,7 +67,7 @@ class TensorPoolTest(test.TestCase): input_value, pool_size=10, pooling_probability=0.0) self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: for i in range(50): out = session.run(output_value, {input_value: i}) self.assertEqual(out, i) @@ -83,7 +83,7 @@ class TensorPoolTest(test.TestCase): pooling_probability=pooling_probability) self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: not_pooled = 0 total = 1000 for i in range(total): @@ -104,7 +104,7 @@ class TensorPoolTest(test.TestCase): for output_value in output_values: self.assertEqual(output_value.shape.as_list(), []) - with self.test_session(use_gpu=True) as session: + with self.session(use_gpu=True) as session: for i in range(10): outs = session.run(output_values, { input_values[0]: i, diff --git a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py index 2fe06a287284ff994326d5a977a2e4d4634268ae..ecfbb8a432e3308863edd6f1343be55c1fe5753c 100644 --- a/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py +++ b/tensorflow/contrib/gan/python/features/python/virtual_batchnorm_test.py @@ -59,7 +59,7 @@ class VirtualBatchnormTest(test.TestCase): mom_mean, mom_var = nn.moments(tensors, axes) vb_var = mean_sq - math_ops.square(vb_mean) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: vb_mean_np, vb_var_np, mom_mean_np, mom_var_np = sess.run([ vb_mean, vb_var, mom_mean, mom_var]) @@ -93,7 +93,7 @@ class VirtualBatchnormTest(test.TestCase): vb_mean = array_ops.squeeze(vb_mean, batch_axis) vb_variance = array_ops.squeeze(vb_variance, batch_axis) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: vb_mean_np, vb_var_np, mom_mean_np, mom_var_np = sess.run([ vb_mean, vb_variance, mom_mean, mom_variance]) @@ -116,7 +116,7 @@ class VirtualBatchnormTest(test.TestCase): vbn = virtual_batchnorm.VBN(batch, axis, batch_axis=batch_axis) vbn_normalized = vbn.reference_batch_normalization() - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() bn_normalized_np, vbn_normalized_np = sess.run( @@ -142,7 +142,7 @@ class VirtualBatchnormTest(test.TestCase): vb_normed = array_ops.squeeze( vbn(array_ops.expand_dims(examples[i], [0])), [0]) - with self.test_session(use_gpu=True) as sess: + with self.cached_session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() bn_np, vb_np = sess.run([batch_normalized, vb_normed]) self.assertAllClose(bn_np[i, ...], vb_np) @@ -167,7 +167,7 @@ class VirtualBatchnormTest(test.TestCase): vbn = virtual_batchnorm.VBN(reference_batch) vbn_fixed_example = array_ops.squeeze( vbn(array_ops.expand_dims(fixed_example, 0)), 0) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): variables_lib.global_variables_initializer().run() vbn_fixed_example_np = vbn_fixed_example.eval() @@ -180,7 +180,7 @@ class VirtualBatchnormTest(test.TestCase): minibatch = array_ops.stack([fixed_example] + examples) vbn_minibatch = vbn(minibatch) cur_vbn_fixed_example = vbn_minibatch[0, ...] - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): variables_lib.global_variables_initializer().run() cur_vbn_fixed_example_np = cur_vbn_fixed_example.eval() self.assertAllClose(vbn_fixed_example_np, cur_vbn_fixed_example_np) @@ -219,7 +219,7 @@ class VirtualBatchnormTest(test.TestCase): self.assertEqual(4, len(contrib_variables_lib.get_variables())) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: variables_lib.global_variables_initializer().run() sess.run(to_fetch) diff --git a/tensorflow/contrib/gdr/gdr_memory_manager.cc b/tensorflow/contrib/gdr/gdr_memory_manager.cc index bb06f1c41c1d60f3c3b3639e3b32ea85161510b2..3549cedb70a6104ff3d3829d1b94cb5f08c5119c 100644 --- a/tensorflow/contrib/gdr/gdr_memory_manager.cc +++ b/tensorflow/contrib/gdr/gdr_memory_manager.cc @@ -22,7 +22,6 @@ limitations under the License. #include #include #include -#include #include #include @@ -30,19 +29,17 @@ limitations under the License. #include #include "tensorflow/contrib/gdr/gdr.pb.h" -#include "tensorflow/core/common_runtime/bfc_allocator.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/dma_helper.h" -#include "tensorflow/core/common_runtime/pool_allocator.h" #include "tensorflow/core/common_runtime/process_state.h" #if GOOGLE_CUDA #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #endif // GOOGLE_CUDA -#include "tensorflow/core/framework/allocator_registry.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/numa.h" namespace tensorflow { @@ -70,14 +67,11 @@ bool IsGDRAvailable() { int TryToReadNumaNode(ibv_device* device) { #if defined(__APPLE__) LOG(INFO) << "OS X does not support NUMA - returning NUMA node 0"; - return 0; + return port::kNUMANoAffinity; #elif defined(PLATFORM_WINDOWS) // Windows support for NUMA is not currently implemented. Return node 0. - return 0; + return port::kNUMANoAffinity; #else - VLOG(2) << "Trying to read NUMA node for device: " << device->name; - static const int kUnknownNumaNode = -1; - auto filename = string(device->ibdev_path) + "/device/numa_node"; std::ifstream ifs(filename.c_str()); @@ -91,12 +85,12 @@ int TryToReadNumaNode(ibv_device* device) { << value << "), but there must be at least one NUMA node" ", so returning NUMA node zero"; - return 0; + return port::kNUMANoAffinity; } LOG(INFO) << "NUMA node for device: " << device->name << " is " << value; return value; } - return kUnknownNumaNode; + return port::kNUMANoAffinity; #endif } @@ -138,8 +132,6 @@ class GdrMemoryManager : public RemoteMemoryManager { Device* device, DeviceContext* device_context, bool on_host, StatusCallback done) override; - static void RegMemVisitors(); - protected: Status CreateEndpoint(const string& host, const string& port, RdmaEndpointPtr& endpoint); @@ -150,7 +142,8 @@ class GdrMemoryManager : public RemoteMemoryManager { ibv_mr* FindMemoryRegion(void* addr, size_t length); - void InsertMemoryRegion(void* addr, size_t length); + void InsertMemoryRegion(void* addr, size_t length, + const std::string& allocator_name); void EvictMemoryRegion(void* addr, size_t length); @@ -160,6 +153,7 @@ class GdrMemoryManager : public RemoteMemoryManager { RdmaEndpointPtr listening_; std::atomic stopped_; int epfd_; + int numa_node_; // Server side endpoints // Accessed sequentially in Run() so not protected by lock @@ -190,46 +184,10 @@ GdrMemoryManager::GdrMemoryManager(const string& host, const string& port) port_(port), listening_(nullptr, EndpointDeleter), stopped_(true), - next_key_(0) { - static std::once_flag flag; - std::call_once(flag, []() { RegMemVisitors(); }); -} + next_key_(0) {} GdrMemoryManager::~GdrMemoryManager() { close(epfd_); } -/*static*/ void GdrMemoryManager::RegMemVisitors() { - SubAllocator::Visitor alloc_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("CPU:", numa_node)); - }; - SubAllocator::Visitor free_visitor = [](void* ptr, int numa_node, - size_t num_bytes) { - GdrMemoryManager::Singleton().EvictMemoryRegion(ptr, num_bytes); - }; - ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); - ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); - -#if GOOGLE_CUDA - if (IsGDRAvailable()) { - int32_t bus_id = TryToReadNumaNode(rdma_adapter_->context_->device) + 1; - - // Note we don't free allocated GPU memory so there is no free visitor - SubAllocator::Visitor cuda_alloc_visitor = [](void* ptr, int gpu_id, - size_t num_bytes) { - RdmaMemoryMgr::Singleton().InsertMemoryRegion( - ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); - }; - GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, - cuda_alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, - alloc_visitor); - GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); - LOG(INFO) << "Instrumenting GPU allocator with bus_id " << bus_id; - } -#endif // GOOGLE_CUDA -} - Status GdrMemoryManager::Init() { epfd_ = epoll_create1(0); if (epfd_ == -1) { @@ -289,6 +247,42 @@ Status GdrMemoryManager::Init() { "cannot add server to epoll"); } + numa_node_ = TryToReadNumaNode(listening_->verbs->device); + + SubAllocator::Visitor alloc_visitor = [this](void* ptr, int numa_node, + size_t num_bytes) { + VLOG(2) << "Registering RDMA capable memory region on numa_node " + << numa_node; + InsertMemoryRegion(ptr, num_bytes, strings::StrCat("CPU:", numa_node)); + }; + SubAllocator::Visitor free_visitor = [this](void* ptr, int numa_node, + size_t num_bytes) { + VLOG(2) << "De-registering RDMA capable memory region on numa_node " + << numa_node; + EvictMemoryRegion(ptr, num_bytes); + }; + ProcessState::singleton()->AddCPUAllocVisitor(alloc_visitor); + ProcessState::singleton()->AddCPUFreeVisitor(free_visitor); + LOG(INFO) << "Instrumenting CPU allocator(s)"; + +#if GOOGLE_CUDA + if (IsGDRAvailable()) { + int bus_id = numa_node_ + 1; + + SubAllocator::Visitor cuda_alloc_visitor = [this](void* ptr, int gpu_id, + size_t num_bytes) { + VLOG(2) << "Registering RDMA capable memory region on GPU " << gpu_id; + InsertMemoryRegion(ptr, num_bytes, strings::StrCat("GPU:", gpu_id)); + }; + GPUProcessState::singleton()->AddGPUAllocVisitor(bus_id, + cuda_alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostAllocVisitor(bus_id, + alloc_visitor); + GPUProcessState::singleton()->AddCUDAHostFreeVisitor(bus_id, free_visitor); + LOG(INFO) << "Instrumenting GPU allocator(s) with bus_id " << bus_id; + } +#endif // GOOGLE_CUDA + return Status::OK(); } @@ -405,7 +399,7 @@ void GdrMemoryManager::TransportOptionsFromTensor( ibv_mr* mr = FindMemoryRegion(addr, length); #if GOOGLE_CUDA - if (!on_host) { + if (device->tensorflow_gpu_device_info() && !on_host) { Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); Tensor* host_copy = new Tensor(alloc, tensor.dtype(), tensor.shape()); GPUUtil::CopyGPUTensorToCPU( @@ -456,11 +450,27 @@ void GdrMemoryManager::TransportOptionsFromTensor( #endif if (mr == nullptr) { - done(errors::Unavailable("Cannot find pinned memory region")); - return; + Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_); + Tensor host_copy(alloc, tensor.dtype(), tensor.shape()); + + std::memcpy(DMAHelper::buffer(&host_copy)->data(), buffer->data(), length); + VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer"; + + buffer = DMAHelper::buffer(&host_copy); + addr = buffer->data(); + length = buffer->size(); + + mr = FindMemoryRegion(addr, length); + if (mr == nullptr) { + done(errors::Unavailable("Cannot find pinned memory region")); + return; + } + + buffer->Ref(); + } else { + buffer->Ref(); } - buffer->Ref(); TensorKey tensor_key = next_key_++; { mutex_lock l(server_mu_); @@ -470,7 +480,7 @@ void GdrMemoryManager::TransportOptionsFromTensor( uint64_t checksum = 0; if (VLOG_IS_ON(2)) { #ifdef GOOGLE_CUDA - if (!on_host) { + if (device->tensorflow_gpu_device_info() && !on_host) { checksum = GPUUtil::Checksum(device, device_context, tensor); } else { checksum = GPUUtil::Checksum(tensor); @@ -508,7 +518,8 @@ void GdrMemoryManager::TensorFromTransportOptions( Tensor host_copy; #if GOOGLE_CUDA if (mr == nullptr && !on_host) { - Allocator* alloc = GPUProcessState::singleton()->GetCUDAHostAllocator(0); + Allocator* alloc = + GPUProcessState::singleton()->GetCUDAHostAllocator(numa_node_); host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); buffer = DMAHelper::buffer(&host_copy); addr = buffer->data(); @@ -518,8 +529,18 @@ void GdrMemoryManager::TensorFromTransportOptions( #endif // GOOGLE_CUDA if (mr == nullptr) { - done(errors::Unavailable("Cannot find pinned memory region")); - return; + Allocator* alloc = ProcessState::singleton()->GetCPUAllocator(numa_node_); + host_copy = Tensor(alloc, tensor->dtype(), tensor->shape()); + + buffer = DMAHelper::buffer(&host_copy); + addr = buffer->data(); + length = buffer->size(); + + mr = FindMemoryRegion(addr, length); + if (mr == nullptr) { + done(errors::Unavailable("Cannot find pinned memory region")); + return; + } } decltype(clients_)::iterator iter; @@ -568,7 +589,8 @@ void GdrMemoryManager::TensorFromTransportOptions( } #if GOOGLE_CUDA - if (host_copy.NumElements() > 0) { + if (device->tensorflow_gpu_device_info() && !on_host && + host_copy.NumElements() > 0) { uint64_t checksum = 0; if (VLOG_IS_ON(2)) { checksum = GPUUtil::Checksum(host_copy); @@ -598,6 +620,12 @@ void GdrMemoryManager::TensorFromTransportOptions( } #endif // GOOGLE_CUDA + if ((on_host || !device->tensorflow_gpu_device_info()) && + host_copy.NumElements() > 0) { + std::memcpy(DMAHelper::buffer(tensor)->data(), addr, length); + VLOG(2) << "Copying " << length << " bytes unpinned tensor buffer"; + } + uint64_t end = Env::Default()->NowMicros(); VLOG(2) << "RDMA from remote memory region " << remote_mr.rkey() @@ -607,7 +635,7 @@ void GdrMemoryManager::TensorFromTransportOptions( uint64_t checksum = 0; if (VLOG_IS_ON(2)) { #ifdef GOOGLE_CUDA - if (device->tensorflow_gpu_device_info() && (!on_host)) { + if (device->tensorflow_gpu_device_info() && !on_host) { checksum = GPUUtil::Checksum(device, device_context, *tensor); } else { checksum = GPUUtil::Checksum(*tensor); @@ -668,7 +696,8 @@ ibv_mr* GdrMemoryManager::FindMemoryRegion(void* addr, size_t length) { } } -void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) { +void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length, + const std::string& allocator_name) { if (length == 0) return; ibv_mr* mr = rdma_reg_read(listening_.get(), addr, length); if (mr != nullptr) { @@ -676,7 +705,8 @@ void GdrMemoryManager::InsertMemoryRegion(void* addr, size_t length) { auto iter = std::upper_bound(mrs_.begin(), mrs_.end(), addr, &Comparator); mrs_.insert(iter, {mr, &MRDeleter}); } else { - LOG(WARNING) << "Cannot register memory region"; + LOG(WARNING) << "Cannot register memory region allocated by " + << allocator_name; } } diff --git a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh index adf027b8e714124cde2b4618546e20c6b7162e1f..69553c3bd15c9359a6ab879bc4e104bd5c30beac 100644 --- a/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh +++ b/tensorflow/contrib/kafka/python/kernel_tests/kafka_test.sh @@ -22,8 +22,12 @@ if [ "$#" -ne 2 ]; then exit 1 fi +action=$1 container=$2 -if [ "$1" == "start" ]; then +if [ "$action" == "start" ]; then + echo pull spotify/kafka + docker pull spotify/kafka + echo pull spotify/kafka successfully docker run -d --rm --net=host --name=$container spotify/kafka echo Wait 5 secs until kafka is up and running sleep 5 @@ -33,12 +37,10 @@ if [ "$1" == "start" ]; then docker exec $container bash -c 'echo -e "D0\nD1\nD2\nD3\nD4\nD5\nD6\nD7\nD8\nD9" > /test' echo Produce test message docker exec $container bash -c '/opt/kafka_2.11-0.10.1.0/bin/kafka-console-producer.sh --topic test --broker-list 127.0.0.1:9092 < /test' - echo Container $container started successfully -elif [ "$1" == "stop" ]; then +elif [ "$action" == "stop" ]; then docker rm -f $container - - echo Container $container stopped successfully + echo Container $container removed successfully else echo "Usage: $0 start|stop " >&2 exit 1 diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index c1de42782efb3497660affb3ef7162457977c150..3efceab3375d3a1801c87122c98920cc523a3aca 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -1433,13 +1433,12 @@ class Estimator(BaseEstimator): 'must specify no transforms.') untransformed_tags = graph_rewrite_specs[0].tags - # TODO(soergel): switch to main_op or otherwise update when dust settles builder.add_meta_graph_and_variables( session, untransformed_tags, signature_def_map=signature_def_map, assets_collection=ops.get_collection(ops.GraphKeys.ASSET_FILEPATHS), - legacy_init_op=init_op, + main_op=init_op, strip_default_attrs=strip_default_attrs) # pylint: disable=protected-access diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index b98adf862bf1514b43d237196cb2de531a909479..94ff1dd5b01b5a24d1deb7053553b9df48709c7c 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -22,6 +22,7 @@ import collections from six.moves import range from tensorflow.contrib.linear_optimizer.python.ops.sharded_mutable_dense_hashtable import ShardedMutableDenseHashTable +from tensorflow.python.compat import compat from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -151,7 +152,8 @@ class SdcaModel(object): default_value=[0.0, 0.0, 0.0, 0.0], # SdcaFprint never returns 0 or 1 for the low64 bits, so this a safe # empty_key (that will never collide with actual payloads). - empty_key=[0, 0]) + empty_key=[0, 0], + deleted_key=[1, 1]) summary.scalar('approximate_duality_gap', self.approximate_duality_gap()) summary.scalar('examples_seen', self._hashtable.size()) @@ -202,7 +204,7 @@ class SdcaModel(object): with ops.colocate_with(v): # TODO(andreasst): remove SDCAOptimizer suffix once bug 30843109 # is fixed. - slot_var = var_ops.Variable( + slot_var = var_ops.VariableV1( initial_value=array_ops.zeros_like(v.initialized_value(), dtypes.float32), name=v.op.name + '_unshrinked/SDCAOptimizer') @@ -214,7 +216,7 @@ class SdcaModel(object): # TODO(andreasst): remove SDCAOptimizer suffix once bug 30843109 is # fixed. self._slots['unshrinked_' + name].append( - var_ops.Variable( + var_ops.VariableV1( array_ops.zeros_like(var.initialized_value(), dtypes.float32), name=var.op.name + '_unshrinked/SDCAOptimizer')) @@ -485,24 +487,44 @@ class SdcaModel(object): sparse_weights.append(batch_gathered_weights) # pylint: disable=protected-access - esu, sfw, dfw = gen_sdca_ops.sdca_optimizer( - sparse_example_indices, - sparse_feature_indices, - sparse_features_values, - self._convert_n_to_tensor(self._examples['dense_features']), - internal_convert_to_tensor(self._examples['example_weights']), - internal_convert_to_tensor(self._examples['example_labels']), - sparse_indices, - sparse_weights, - self._convert_n_to_tensor(self._slots[ - 'unshrinked_dense_features_weights']), - example_state_data, - loss_type=self._options['loss_type'], - l1=self._options['symmetric_l1_regularization'], - l2=self._symmetric_l2_regularization(), - num_loss_partitions=self._num_loss_partitions(), - num_inner_iterations=1, - adaptative=self._adaptive()) + if compat.forward_compatible(year=2018, month=10, day=30): + esu, sfw, dfw = gen_sdca_ops.sdca_optimizer_v2( + sparse_example_indices, + sparse_feature_indices, + sparse_features_values, + self._convert_n_to_tensor(self._examples['dense_features']), + internal_convert_to_tensor(self._examples['example_weights']), + internal_convert_to_tensor(self._examples['example_labels']), + sparse_indices, + sparse_weights, + self._convert_n_to_tensor(self._slots[ + 'unshrinked_dense_features_weights']), + example_state_data, + loss_type=self._options['loss_type'], + l1=self._options['symmetric_l1_regularization'], + l2=self._symmetric_l2_regularization(), + num_loss_partitions=self._num_loss_partitions(), + num_inner_iterations=1, + adaptive=self._adaptive()) + else: + esu, sfw, dfw = gen_sdca_ops.sdca_optimizer( + sparse_example_indices, + sparse_feature_indices, + sparse_features_values, + self._convert_n_to_tensor(self._examples['dense_features']), + internal_convert_to_tensor(self._examples['example_weights']), + internal_convert_to_tensor(self._examples['example_labels']), + sparse_indices, + sparse_weights, + self._convert_n_to_tensor(self._slots[ + 'unshrinked_dense_features_weights']), + example_state_data, + loss_type=self._options['loss_type'], + l1=self._options['symmetric_l1_regularization'], + l2=self._symmetric_l2_regularization(), + num_loss_partitions=self._num_loss_partitions(), + num_inner_iterations=1, + adaptative=self._adaptive()) # pylint: enable=protected-access with ops.control_dependencies([esu]): diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py index 5015fb0848107950dd27eb81431dd308f22858bc..44a869f7c2745c594b6a4ea69a2a9e6f1b4f780a 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable.py @@ -48,6 +48,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): value_dtype, default_value, empty_key, + deleted_key, num_shards=1, checkpoint=True, name='ShardedMutableHashTable'): @@ -62,6 +63,7 @@ class ShardedMutableDenseHashTable(lookup.LookupInterface): value_dtype=value_dtype, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, checkpoint=checkpoint, name='%s-%d-of-%d' % (name, i + 1, num_shards))) self._table_shards = table_shards diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py index 553b116a3b3d76423d4700691fb6912101bebca4..2b56d0fa3a8b8564b7c73a62bd99cc900d6f5c54 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py @@ -33,6 +33,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): with self.cached_session(): default_val = -1 empty_key = 0 + deleted_key = -1 keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = ShardedMutableDenseHashTable( @@ -40,6 +41,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.int64, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) @@ -56,6 +58,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): with self.cached_session(): default_val = [-0.1, 0.2] empty_key = [0, 1] + deleted_key = [1, 0] keys = constant_op.constant([[11, 12], [13, 14], [15, 16]], dtypes.int64) values = constant_op.constant([[0.5, 0.6], [1.5, 1.6], [2.5, 2.6]], @@ -65,6 +68,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.float32, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) @@ -81,6 +85,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): def testExportSharded(self): with self.cached_session(): empty_key = -2 + deleted_key = -3 default_val = -1 num_shards = 2 keys = constant_op.constant([10, 11, 12], dtypes.int64) @@ -90,6 +95,7 @@ class ShardedMutableDenseHashTableTest(TensorFlowTestCase): dtypes.int64, default_val, empty_key, + deleted_key, num_shards=num_shards) self.assertAllEqual(0, table.size().eval()) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 7ef26de69f2699e3d9f55a15737b96a3505cf6eb..e62c192dfcc8d38cd168b0efdc14da74967eb939 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -212,7 +212,8 @@ def json_to_tflite(name, src, out): # This is the master list of generated examples that will be made into tests. A # function called make_XXX_tests() must also appear in generate_examples.py. -# Disable a test by commenting it out. If you do, add a link to a bug or issue. +# Disable a test by adding it to the blacklists specified in +# generated_test_models_failing(). def generated_test_models(): return [ "add", @@ -291,12 +292,31 @@ def generated_test_models(): "tile", "topk", "transpose", - #"transpose_conv", # disabled due to b/111213074 + "transpose_conv", "unpack", "where", "zeros_like", ] +# List of models that fail generated tests for the conversion mode. +# If you have to disable a test, please add here with a link to the appropriate +# bug or issue. +def generated_test_models_failing(conversion_mode): + if not conversion_mode: + return [ + "transpose_conv", # disabled due to b/111213074 + ] + + if conversion_mode == "toco-flex": + # TODO(b/117328698): Fix and enable the known flex failures. + return [ + "lstm", + "split", + "unpack", + ] + + return [] + def generated_test_conversion_modes(): """Returns a list of conversion modes.""" @@ -307,16 +327,28 @@ def generated_test_models_all(): """Generates a list of all tests with the different converters. Returns: - List of tuples representing (conversion mode, name of test). + List of tuples representing: + (conversion mode, name of test, test tags, test args). """ conversion_modes = generated_test_conversion_modes() tests = generated_test_models() options = [] for conversion_mode in conversion_modes: + failing_tests = generated_test_models_failing(conversion_mode) for test in tests: + tags = [] + args = [] + if test in failing_tests: + tags.append("notap") + tags.append("manual") if conversion_mode: test += "_%s" % conversion_mode - options.append((conversion_mode, test)) + + # Flex conversion shouldn't suffer from the same conversion bugs + # listed for the default TFLite kernel backend. + if conversion_mode == "toco-flex": + args.append("--ignore_known_bugs=false") + options.append((conversion_mode, test, tags, args)) return options def gen_zip_test(name, test_name, conversion_mode, **kwargs): @@ -336,9 +368,6 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs): # if conversion_mode == "pb2lite": # toco = "//tensorflow/contrib/lite/experimental/pb2lite:pb2lite" flags = "--ignore_toco_errors --run_with_flex" - kwargs["tags"].append("skip_already_failing") - kwargs["tags"].append("no_oss") - kwargs["tags"].append("notap") gen_zipped_test_file( name = "zip_%s" % test_name, @@ -392,14 +421,14 @@ def gen_selected_ops(name, model): tools = [tool], ) -def gen_full_model_test(conversion_modes, models, data, test_suite_tag): +def gen_full_model_test(conversion_modes, models, data, tags): """Generates Python test targets for testing TFLite models. Args: conversion_modes: List of conversion modes to test the models on. models: List of models to test. data: List of BUILD targets linking the data. - test_suite_tag: Tag identifying the model test suite. + tags: Any additional tags including the test_suite tag. """ options = [ (conversion_mode, model) @@ -422,9 +451,11 @@ def gen_full_model_test(conversion_modes, models, data, test_suite_tag): "no_oss", "no_windows", "notap", - ] + [test_suite_tag], + # TODO(nupurgarg): Remove manual tag when this test is running without the BUILD flag. + "manual", + ] + tags, deps = [ - "//tensorflow/contrib/lite/testing:model_coverage_lib", + "//tensorflow/contrib/lite/testing/model_coverage:model_coverage_lib", "//tensorflow/contrib/lite/python:lite", "//tensorflow/python:client_testlib", ], diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 7809d114e2f72991be98bfa760f1f240864b5aa6..eb26c2dbdbb41ce17cab362dae14ef67f760ce27 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -120,6 +120,8 @@ typedef enum { kTfLiteBuiltinSquare = 92, kTfLiteBuiltinZerosLike = 93, kTfLiteBuiltinFill = 94, + kTfLiteBuiltinFloorMod = 95, + kTfLiteBuiltinRange = 96, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/c/builtin_op_data.h b/tensorflow/contrib/lite/c/builtin_op_data.h index 44daf7adaa0e76300c7199df2a7267e21d340534..1e65c3cee27798990eb9888e67306c6285925a1f 100644 --- a/tensorflow/contrib/lite/c/builtin_op_data.h +++ b/tensorflow/contrib/lite/c/builtin_op_data.h @@ -186,6 +186,13 @@ typedef struct { TfLiteLSTMKernelType kernel_type; } TfLiteLSTMParams; +typedef struct { + // Parameters for the LSTM kernel. + TfLiteFusedActivation activation; + float cell_clip; + float proj_clip; +} TfLiteUnidirectionalSequenceLSTMParams; + typedef struct { // Parameters for the LSTM kernel. TfLiteFusedActivation activation; diff --git a/tensorflow/contrib/lite/c/c_api_internal.c b/tensorflow/contrib/lite/c/c_api_internal.c index 8a0c177b1948df9b98e68f6cc6f44628ea8407a3..8be37945ca2a5ddf3c8cedc5a3ae5e34da8a4b9b 100644 --- a/tensorflow/contrib/lite/c/c_api_internal.c +++ b/tensorflow/contrib/lite/c/c_api_internal.c @@ -28,10 +28,15 @@ int TfLiteIntArrayGetSizeInBytes(int size) { int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b) { if (a == b) return 1; if (a == NULL || b == NULL) return 0; - if (a->size != b->size) return 0; + return TfLiteIntArrayEqualsArray(a, b->size, b->data); +} + +int TfLiteIntArrayEqualsArray(TfLiteIntArray* a, int b_size, int b_data[]) { + if (a == NULL) return (b_size == 0); + if (a->size != b_size) return 0; int i = 0; for (; i < a->size; i++) - if (a->data[i] != b->data[i]) return 0; + if (a->data[i] != b_data[i]) return 0; return 1; } diff --git a/tensorflow/contrib/lite/c/c_api_internal.h b/tensorflow/contrib/lite/c/c_api_internal.h index ee3dff6792a33a575e75fe7a1ef3dc7985be9c1d..fdc9ff634a19d348ab2dfae60d94722619dfec06 100644 --- a/tensorflow/contrib/lite/c/c_api_internal.h +++ b/tensorflow/contrib/lite/c/c_api_internal.h @@ -88,9 +88,12 @@ int TfLiteIntArrayGetSizeInBytes(int size); // This returns a pointer, that you must free using TfLiteIntArrayFree(). TfLiteIntArray* TfLiteIntArrayCreate(int size); -// Check if two tensors are equal. Returns 1 if they are equal, 0 otherwise. +// Check if two intarrays are equal. Returns 1 if they are equal, 0 otherwise. int TfLiteIntArrayEqual(TfLiteIntArray* a, TfLiteIntArray* b); +// Check if an intarray equals an array. Returns 1 if equals, 0 otherwise. +int TfLiteIntArrayEqualsArray(TfLiteIntArray* a, int b_size, int b_data[]); + // Create a copy of an array passed as `src`. // You are expected to free memory with TfLiteIntArrayFree TfLiteIntArray* TfLiteIntArrayCopy(TfLiteIntArray* src); diff --git a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc index eac7db9a88d2adbee9a751957beb5272fbe0e652..348ce54dd73cc5a1bd655b8e4724450b45029b08 100644 --- a/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc +++ b/tensorflow/contrib/lite/core/api/flatbuffer_conversions.cc @@ -371,7 +371,6 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } - case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: case BuiltinOperator_LSTM: { auto params = allocator->AllocatePOD(); if (auto* lstm_params = op->builtin_options_as_LSTMOptions()) { @@ -391,6 +390,20 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, *builtin_data = reinterpret_cast(params); break; } + case BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: { + auto* params = + allocator->AllocatePOD(); + if (auto* seq_lstm_params = + op->builtin_options_as_UnidirectionalSequenceLSTMOptions()) { + params->activation = + parse_activation(seq_lstm_params->fused_activation_function()); + params->cell_clip = seq_lstm_params->cell_clip(); + params->proj_clip = seq_lstm_params->proj_clip(); + } + *builtin_data = reinterpret_cast(params); + break; + } + case BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: { auto params = allocator->AllocatePOD(); @@ -638,6 +651,8 @@ TfLiteStatus ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_SQUARE: case BuiltinOperator_ZEROS_LIKE: case BuiltinOperator_FILL: + case BuiltinOperator_FLOOR_MOD: + case BuiltinOperator_RANGE: break; } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/delegates/flex/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD index 9b89ed4f849e224d36adae7c3a7581ac542d4f0f..2f866eaecb801695d800565e195f959d55a88201 100644 --- a/tensorflow/contrib/lite/delegates/flex/BUILD +++ b/tensorflow/contrib/lite/delegates/flex/BUILD @@ -42,8 +42,31 @@ tf_cc_test( ], ) +# Delegate implementation that pulls in the standard set of TensorFlow ops and +# kernels. cc_library( name = "delegate", + hdrs = [ + "delegate.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":delegate_only_runtime", + ] + select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + ], + "//conditions:default": [ + "//tensorflow/core:tensorflow", + ], + }), + alwayslink = 1, +) + +# Delegate implementation that does *not* pull in the standard set of TensorFlow +# ops and kernels. +cc_library( + name = "delegate_only_runtime", srcs = [ "delegate.cc", ], @@ -134,12 +157,12 @@ cc_library( # set of core TensorFlow kernels. We may want to revisit this dependency # to allow selective registration via build targets. "//tensorflow:android": [ - "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:framework", - "//tensorflow/core:tensorflow", ], }), ) @@ -153,7 +176,14 @@ tf_cc_test( ":kernel", ":test_util", "@com_google_googletest//:gtest", - ], + ] + select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + ], + "//conditions:default": [ + "//tensorflow/core:tensorflow", + ], + }), ) cc_library( @@ -180,7 +210,7 @@ cc_library( "//tensorflow/contrib/lite:kernel_api", ] + select({ "//tensorflow:android": [ - "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ "//tensorflow/core:lib", diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/BUILD b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..2125f218ca877f94ec9f4d98928b6a1c8f2576eb --- /dev/null +++ b/tensorflow/contrib/lite/experimental/examples/lstm/BUILD @@ -0,0 +1,40 @@ +licenses(["notice"]) # Apache 2.0 + +package(default_visibility = ["//tensorflow:internal"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +py_library( + name = "tflite_lstm", + srcs = ["tflite_lstm.py"], + srcs_version = "PY2AND3", + visibility = ["//visibility:public"], + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/lite/python:lite", + "//tensorflow/python:framework", + "@six_archive//:six", + ], +) + +py_test( + name = "unidirectional_sequence_lstm_test", + size = "large", + srcs = ["unidirectional_sequence_lstm_test.py"], + srcs_version = "PY2AND3", + tags = [ + "no_oss", + "no_pip", + ], + deps = [ + ":tflite_lstm", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/lite/python:lite", + "//tensorflow/examples/tutorials/mnist:input_data", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform", + "//tensorflow/python/tools:optimize_for_inference", + "//third_party/py/numpy", + "@six_archive//:six", + ], +) diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py new file mode 100644 index 0000000000000000000000000000000000000000..2357743266f7082a5a003153718de08c83174ea5 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/examples/lstm/tflite_lstm.py @@ -0,0 +1,396 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TfLite LSTMCell wrapper. + +TODO(renjieliu): Find a better home for this one. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tensorflow as tf + +from tensorflow.contrib.lite.python import lite +from tensorflow.python.keras import activations +from tensorflow.python.keras import initializers +from tensorflow.python.layers import base as base_layer +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import clip_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import rnn_cell_impl +from tensorflow.python.platform import tf_logging as logging + + +class TFLiteLSTMCell(rnn_cell_impl.LayerRNNCell): + """Long short-term memory unit (LSTM) recurrent network cell. + + This is used only for TfLite, it provides hints and it also makes the + variables in the desired for the tflite ops (transposed and seaparated). + + The default non-peephole implementation is based on: + + https://pdfs.semanticscholar.org/1154/0131eae85b2e11d53df7f1360eeb6476e7f4.pdf + + Felix Gers, Jurgen Schmidhuber, and Fred Cummins. + "Learning to forget: Continual prediction with LSTM." IET, 850-855, 1999. + + The peephole implementation is based on: + + https://research.google.com/pubs/archive/43905.pdf + + Hasim Sak, Andrew Senior, and Francoise Beaufays. + "Long short-term memory recurrent neural network architectures for + large scale acoustic modeling." INTERSPEECH, 2014. + + The class uses optional peep-hole connections, optional cell clipping, and + an optional projection layer. + + Note that this cell is not optimized for performance. Please use + `tf.contrib.cudnn_rnn.CudnnLSTM` for better performance on GPU, or + `tf.contrib.rnn.LSTMBlockCell` and `tf.contrib.rnn.LSTMBlockFusedCell` for + better performance on CPU. + """ + + def __init__(self, + num_units, + use_peepholes=False, + cell_clip=None, + initializer=None, + num_proj=None, + proj_clip=None, + num_unit_shards=None, + num_proj_shards=None, + forget_bias=1.0, + state_is_tuple=True, + activation=None, + reuse=None, + name=None, + dtype=None): + """Initialize the parameters for an LSTM cell. + + Args: + num_units: int, The number of units in the LSTM cell. + use_peepholes: bool, set True to enable diagonal/peephole connections. + cell_clip: (optional) A float value, if provided the cell state is clipped + by this value prior to the cell output activation. + initializer: (optional) The initializer to use for the weight and + projection matrices. + num_proj: (optional) int, The output dimensionality for the projection + matrices. If None, no projection is performed. + proj_clip: (optional) A float value. If `num_proj > 0` and `proj_clip` is + provided, then the projected values are clipped elementwise to within + `[-proj_clip, proj_clip]`. + num_unit_shards: Deprecated, will be removed by Jan. 2017. Use a + variable_scope partitioner instead. + num_proj_shards: Deprecated, will be removed by Jan. 2017. Use a + variable_scope partitioner instead. + forget_bias: Biases of the forget gate are initialized by default to 1 in + order to reduce the scale of forgetting at the beginning of the + training. Must set it manually to `0.0` when restoring from CudnnLSTM + trained checkpoints. + state_is_tuple: If True, accepted and returned states are 2-tuples of the + `c_state` and `m_state`. If False, they are concatenated along the + column axis. This latter behavior will soon be deprecated. + activation: Activation function of the inner states. Default: `tanh`. + reuse: (optional) Python boolean describing whether to reuse variables in + an existing scope. If not `True`, and the existing scope already has + the given variables, an error is raised. + name: String, the name of the layer. Layers with the same name will share + weights, but to avoid mistakes we require reuse=True in such cases. + dtype: Default dtype of the layer (default of `None` means use the type of + the first input). Required when `build` is called before `call`. When + restoring from CudnnLSTM-trained checkpoints, use + `CudnnCompatibleLSTMCell` instead. + """ + super(TFLiteLSTMCell, self).__init__(_reuse=reuse, name=name, dtype=dtype) + # TODO(raziel): decide if we want to just support tuples (yes please!). + if not state_is_tuple: + logging.warn( + "%s: Using a concatenated state is slower and will soon be " + "deprecated. Use state_is_tuple=True.", self) + if num_unit_shards is not None or num_proj_shards is not None: + logging.warn( + "%s: The num_unit_shards and proj_unit_shards parameters are " + "deprecated and will be removed in Jan 2017. " + "Use a variable scope with a partitioner instead.", self) + + # Inputs must be 2-dimensional. + # TODO(raziel): layers stuff -- chop if un-layerizing Op. + self.input_spec = base_layer.InputSpec(ndim=2) + + self._tflite_wrapper = lite.OpHint("UnidirectionalSequenceLstm") + + self._num_units = num_units + self._use_peepholes = use_peepholes + self._cell_clip = cell_clip + self._initializer = initializer + self._num_proj = num_proj + self._proj_clip = proj_clip + self._num_unit_shards = num_unit_shards + self._num_proj_shards = num_proj_shards + self._forget_bias = forget_bias + self._state_is_tuple = state_is_tuple + self._activation = activation or math_ops.tanh + + self._output_size = num_proj if num_proj else num_units + self._state_size = ( + tf.nn.rnn_cell.LSTMStateTuple(num_units, self._output_size) + if state_is_tuple else num_units + self._output_size) + + @property + def state_size(self): + return self._state_size + + @property + def output_size(self): + return self._output_size + + def build(self, inputs_shape): + """Build TfLite LSTM cell graph. + + Args: + inputs_shape: The inputs_shape must be known, and is [batch_size, + input_size] shape. + + Raises: + ValueError: if the inputs_shape is invalid. + """ + if len(inputs_shape) != 2 or inputs_shape[1].value is None: + raise ValueError("Invalid inputs_shape, saw shape: %s" % inputs_shape) + + input_depth = inputs_shape[1].value + maybe_partitioner = ( + partitioned_variables.fixed_size_partitioner(self._num_unit_shards) + if self._num_unit_shards is not None else None) + input_weight_shape = [self._num_units, input_depth] + cell_weight_shape = [self._num_units, self._output_size] + bias_shape = [self._num_units] + + def add_variable_wrapped(name, shape, initializer, index, partitioner): + var = self.add_variable( + name, shape=shape, initializer=initializer, partitioner=partitioner) + return self._tflite_wrapper.add_input( + var, name="name", index_override=index) + + weight_initializer = self._initializer + if self.dtype is None: + bias_initializer = init_ops.zeros_initializer + else: + bias_initializer = init_ops.zeros_initializer(dtype=self.dtype) + + self.input_to_input_w = add_variable_wrapped( + "input_to_input_w", input_weight_shape, weight_initializer, 1, + maybe_partitioner) + self.input_to_forget_w = add_variable_wrapped( + "input_to_forget_w", input_weight_shape, weight_initializer, 2, + maybe_partitioner) + self.input_to_cell_w = add_variable_wrapped( + "input_to_cell_w", input_weight_shape, weight_initializer, 3, + maybe_partitioner) + self.input_to_output_w = add_variable_wrapped( + "input_to_output_w", input_weight_shape, weight_initializer, 4, + maybe_partitioner) + self.cell_to_input_w = add_variable_wrapped( + "cell_to_input_w", cell_weight_shape, weight_initializer, 5, + maybe_partitioner) + self.cell_to_forget_w = add_variable_wrapped( + "cell_to_forget_w", cell_weight_shape, weight_initializer, 6, + maybe_partitioner) + self.cell_to_cell_w = add_variable_wrapped( + "cell_to_cell_w", cell_weight_shape, weight_initializer, 7, + maybe_partitioner) + self.cell_to_output_w = add_variable_wrapped( + "cell_to_output_w", cell_weight_shape, weight_initializer, 8, + maybe_partitioner) + + self.input_bias = add_variable_wrapped( + "input_bias", bias_shape, bias_initializer, 12, maybe_partitioner) + self.forget_bias = add_variable_wrapped( + "forget_bias", bias_shape, bias_initializer, 13, maybe_partitioner) + self.cell_bias = add_variable_wrapped( + "cell_bias", bias_shape, bias_initializer, 14, maybe_partitioner) + self.output_bias = add_variable_wrapped( + "output_bias", bias_shape, bias_initializer, 15, maybe_partitioner) + + # index 9, 10, 11. + # f stands for forget, i stands for input and o stands for output. + if self._use_peepholes: + self._w_f_diag = add_variable_wrapped("w_f_diag", [self._num_units], + self._initializer, 9, + maybe_partitioner) + self._w_i_diag = add_variable_wrapped("w_i_diag", [self._num_units], + self._initializer, 10, + maybe_partitioner) + self._w_o_diag = add_variable_wrapped("w_o_diag", [self._num_units], + self._initializer, 11, + maybe_partitioner) + + # index 16 for proj kernel. + if self._num_proj is not None: + maybe_proj_partitioner = ( + partitioned_variables.fixed_size_partitioner(self._num_proj_shards) + if self._num_proj_shards is not None else None) + self._proj_kernel = add_variable_wrapped( + "projection/kernel", [self._num_proj, self._num_units], + self._initializer, + 16, + partitioner=maybe_proj_partitioner) + + self.built = True + + def call(self, inputs, state): + """Run one step of LSTM. + + Args: + inputs: input Tensor, 2D, `[batch, num_units]`. + state: if `state_is_tuple` is False, this must be a state Tensor, `2-D, + [batch, state_size]`. If `state_is_tuple` is True, this must be a tuple + of state Tensors, both `2-D`, with column sizes `c_state` and `m_state`. + + Returns: + A tuple containing: + + - A `2-D, [batch, output_dim]`, Tensor representing the output of the + LSTM after reading `inputs` when previous state was `state`. + Here output_dim is: + num_proj if num_proj was set, + num_units otherwise. + - Tensor(s) representing the new state of LSTM after reading `inputs` when + the previous state was `state`. Same type and shape(s) as `state`. + + Raises: + ValueError: If input size cannot be inferred from inputs via + static shape inference. + """ + inputs = self._tflite_wrapper.add_input( + inputs, tag="input", name="input", aggregate="stack", index_override=0) + + # Make sure inputs and bias_initializer has the same type. + assert inputs.dtype == self.input_to_input_w.dtype + + num_proj = self._num_units if self._num_proj is None else self._num_proj + sigmoid = math_ops.sigmoid + + if self._state_is_tuple: + (c_prev, m_prev) = state + else: + c_prev = array_ops.slice(state, [0, 0], [-1, self._num_units]) + m_prev = array_ops.slice(state, [0, self._num_units], [-1, num_proj]) + + # Note: For TfLite, cell_state is at index 19 while activation state at + # index 18. + c_prev = self._tflite_wrapper.add_input( + c_prev, + tag="c_prev", + name="c_prev", + aggregate="first", + index_override=19) + m_prev = self._tflite_wrapper.add_input( + m_prev, + tag="m_prev", + name="m_prev", + aggregate="first", + index_override=18) + + input_size = inputs.get_shape().with_rank(2)[1] + if input_size.value is None: + raise ValueError("Could not infer input size from inputs.get_shape()[-1]") + + inputs_and_m_prev = array_ops.concat([inputs, m_prev], axis=1) + + # i stands for input gate. + # f stands for forget gate activation. + # o outputs. + # j output of LSTM unit. + # c is the final state. + # m is the output. + i = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_input_w, self.cell_to_input_w], axis=1), + transpose_b=True), self.input_bias) + f = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_forget_w, self.cell_to_forget_w], axis=1), + transpose_b=True), self.forget_bias) + o = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_output_w, self.cell_to_output_w], axis=1), + transpose_b=True), self.output_bias) + j = nn_ops.bias_add( + tf.matmul( + inputs_and_m_prev, + tf.concat([self.input_to_cell_w, self.cell_to_cell_w], axis=1), + transpose_b=True), self.cell_bias) + + # Diagonal connections + if self._use_peepholes: + c = ( + sigmoid(f + self._forget_bias + self._w_f_diag * c_prev) * c_prev + + sigmoid(i + self._w_i_diag * c_prev) * self._activation(j)) + else: + c = ( + sigmoid(f + self._forget_bias) * c_prev + + sigmoid(i) * self._activation(j)) + + if self._cell_clip is not None: + # pylint: disable=invalid-unary-operand-type + c = clip_ops.clip_by_value(c, -self._cell_clip, self._cell_clip) + # pylint: enable=invalid-unary-operand-type + if self._use_peepholes: + m = sigmoid(o + self._w_o_diag * c) * self._activation(c) + else: + m = sigmoid(o) * self._activation(c) + + if self._num_proj is not None: + transposed_proj_kernel = tf.transpose(self._proj_kernel) + m = math_ops.matmul(m, transposed_proj_kernel) + + if self._proj_clip is not None: + # pylint: disable=invalid-unary-operand-type + m = clip_ops.clip_by_value(m, -self._proj_clip, self._proj_clip) + # pylint: enable=invalid-unary-operand-type + + c = self._tflite_wrapper.add_output( + c, tag="c", name="c", aggregate="last", index_override=1) + m = self._tflite_wrapper.add_output( + m, tag="m", name="m", index_override=2, aggregate="stack") + + new_state = ( + tf.nn.rnn_cell.LSTMStateTuple(c, m) + if self._state_is_tuple else array_ops.concat([c, m], 1)) + return m, new_state + + def get_config(self): + config = { + "num_units": self._num_units, + "use_peepholes": self._use_peepholes, + "cell_clip": self._cell_clip, + "initializer": initializers.serialize(self._initializer), + "num_proj": self._num_proj, + "proj_clip": self._proj_clip, + "num_unit_shards": self._num_unit_shards, + "num_proj_shards": self._num_proj_shards, + "forget_bias": self._forget_bias, + "state_is_tuple": self._state_is_tuple, + "activation": activations.serialize(self._activation), + "reuse": self._reuse, + } + base_config = super(TFLiteLSTMCell, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py new file mode 100644 index 0000000000000000000000000000000000000000..2ca977518cb11db5f7ed33afa25ead5c02221a95 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/examples/lstm/unidirectional_sequence_lstm_test.py @@ -0,0 +1,226 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import tempfile +import numpy as np +import tensorflow as tf + +from tensorflow.contrib.lite.experimental.examples.lstm.tflite_lstm import TFLiteLSTMCell +from tensorflow.examples.tutorials.mnist import input_data +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test +from tensorflow.python.tools import optimize_for_inference_lib + +# Number of steps to train model. +TRAIN_STEPS = 1 + +CONFIG = tf.ConfigProto(device_count={"GPU": 0}) + + +class UnidirectionalSequenceLstmTest(test_util.TensorFlowTestCase): + + def setUp(self): + tf.reset_default_graph() + # Import MNIST dataset + self.mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) + + # Define constants + # Unrolled through 28 time steps + self.time_steps = 28 + # Rows of 28 pixels + self.n_input = 28 + # Learning rate for Adam optimizer + self.learning_rate = 0.001 + # MNIST is meant to be classified in 10 classes(0-9). + self.n_classes = 10 + # Batch size + self.batch_size = 16 + # Lstm Units. + self.num_units = 64 + + def buildLstmLayer(self): + return tf.nn.rnn_cell.MultiRNNCell([ + TFLiteLSTMCell( + self.num_units, use_peepholes=True, forget_bias=0, name="rnn1"), + TFLiteLSTMCell(self.num_units, num_proj=64, forget_bias=0, name="rnn2"), + TFLiteLSTMCell( + self.num_units // 2, + use_peepholes=True, + num_proj=64, + forget_bias=0, + name="rnn3"), + TFLiteLSTMCell(self.num_units, forget_bias=0, name="rnn4") + ]) + + def buildModel(self, lstm_layer, is_dynamic_rnn, is_train): + # Weights and biases for output softmax layer. + out_weights = tf.Variable( + tf.random_normal([self.num_units, self.n_classes])) + out_bias = tf.Variable(tf.random_normal([self.n_classes])) + + # input image placeholder + x = tf.placeholder( + "float", [None, self.time_steps, self.n_input], name="INPUT_IMAGE") + + # For dynamic_rnn, train with dynamic_rnn and inference with static_rnn. + # x is shaped [batch_size,time_steps,num_inputs] + if is_dynamic_rnn: + if is_train: + lstm_input = x + outputs, _ = tf.nn.dynamic_rnn(lstm_layer, lstm_input, dtype="float32") + outputs = tf.unstack(outputs, axis=1) + else: + lstm_input = tf.unstack(x, self.time_steps, 1) + outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32") + else: + lstm_input = tf.unstack(x, self.time_steps, 1) + outputs, _ = tf.nn.static_rnn(lstm_layer, lstm_input, dtype="float32") + + # Compute logits by multiplying outputs[-1] of shape [batch_size,num_units] + # by the softmax layer's out_weight of shape [num_units,n_classes] + # plus out_bias + prediction = tf.matmul(outputs[-1], out_weights) + out_bias + output_class = tf.nn.softmax(prediction, name="OUTPUT_CLASS") + + return x, prediction, output_class + + def trainModel(self, x, prediction, output_class, sess): + # input label placeholder + y = tf.placeholder("float", [None, self.n_classes]) + # Loss function + loss = tf.reduce_mean( + tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y)) + # Optimization + opt = tf.train.AdamOptimizer( + learning_rate=self.learning_rate).minimize(loss) + + # Initialize variables + init = tf.global_variables_initializer() + sess.run(init) + for _ in range(TRAIN_STEPS): + batch_x, batch_y = self.mnist.train.next_batch( + batch_size=self.batch_size, shuffle=False) + + batch_x = batch_x.reshape((self.batch_size, self.time_steps, + self.n_input)) + sess.run(opt, feed_dict={x: batch_x, y: batch_y}) + + def saveAndRestoreModel(self, lstm_layer, sess, saver, is_dynamic_rnn): + model_dir = tempfile.mkdtemp() + saver.save(sess, model_dir) + + # Reset the graph. + tf.reset_default_graph() + x, prediction, output_class = self.buildModel( + lstm_layer, is_dynamic_rnn, is_train=False) + + new_sess = tf.Session(config=CONFIG) + saver = tf.train.Saver() + saver.restore(new_sess, model_dir) + return x, prediction, output_class, new_sess + + def getInferenceResult(self, x, output_class, sess): + b1, _ = self.mnist.train.next_batch(batch_size=1) + sample_input = np.reshape(b1, (1, self.time_steps, self.n_input)) + + expected_output = sess.run(output_class, feed_dict={x: sample_input}) + frozen_graph = tf.graph_util.convert_variables_to_constants( + sess, sess.graph_def, [output_class.op.name]) + return sample_input, expected_output, frozen_graph + + def tfliteInvoke(self, graph, test_inputs, outputs): + tf.reset_default_graph() + # Turn the input into placeholder of shape 1 + tflite_input = tf.placeholder( + "float", [1, self.time_steps, self.n_input], name="INPUT_IMAGE_LITE") + tf.import_graph_def(graph, name="", input_map={"INPUT_IMAGE": tflite_input}) + with tf.Session() as sess: + curr = sess.graph_def + curr = tf.contrib.lite.convert_op_hints_to_stubs(graph_def=curr) + + curr = optimize_for_inference_lib.optimize_for_inference( + curr, ["INPUT_IMAGE_LITE"], ["OUTPUT_CLASS"], + [tf.float32.as_datatype_enum]) + + tflite = tf.contrib.lite.toco_convert( + curr, [tflite_input], [outputs], allow_custom_ops=False) + interpreter = tf.contrib.lite.Interpreter(model_content=tflite) + + try: + interpreter.allocate_tensors() + except ValueError: + assert False + + input_index = (interpreter.get_input_details()[0]["index"]) + interpreter.set_tensor(input_index, test_inputs) + interpreter.invoke() + output_index = (interpreter.get_output_details()[0]["index"]) + result = interpreter.get_tensor(output_index) + # Reset all variables so it will not pollute other inferences. + interpreter.reset_all_variables() + return result + + def testStaticRnnMultiRnnCell(self): + sess = tf.Session(config=CONFIG) + + x, prediction, output_class = self.buildModel( + self.buildLstmLayer(), is_dynamic_rnn=False, is_train=True) + self.trainModel(x, prediction, output_class, sess) + + saver = tf.train.Saver() + x, prediction, output_class, new_sess = self.saveAndRestoreModel( + self.buildLstmLayer(), sess, saver, is_dynamic_rnn=False) + + test_inputs, expected_output, frozen_graph = self.getInferenceResult( + x, output_class, new_sess) + + result = self.tfliteInvoke(frozen_graph, test_inputs, output_class) + self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3)) + + def testDynamicRnnMultiRnnCell(self): + sess = tf.Session(config=CONFIG) + + x, prediction, output_class = self.buildModel( + self.buildLstmLayer(), is_dynamic_rnn=True, is_train=True) + self.trainModel(x, prediction, output_class, sess) + + # Since we don't yet support OpHints for dynamic, we will load the model + # back in as a static model. This requires the variables to have the same + # names as if they were trained as a static. Thus, we get rid of while/rnn + # names. + variables_to_save = {} + for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES): + op_name = i.name + if op_name.startswith("while/rnn/"): + op_name = op_name.split("while/rnn/")[1] + if op_name.endswith(":0"): + op_name = op_name.split(":0")[0] + variables_to_save[op_name] = i + saver = tf.train.Saver(variables_to_save) + + x, prediction, output_class, new_sess = self.saveAndRestoreModel( + self.buildLstmLayer(), sess, saver, is_dynamic_rnn=True) + + test_inputs, expected_output, frozen_graph = self.getInferenceResult( + x, output_class, new_sess) + + result = self.tfliteInvoke(frozen_graph, test_inputs, output_class) + self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-3)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD index dad58b6c1cc818d3ae68dd4fdf5ec47315e1b5cc..626f733540264c6fa13ab82557b822690b2d5b8f 100644 --- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/BUILD @@ -14,8 +14,12 @@ tflite_micro_cc_test( name = "micro_speech_test", srcs = [ "micro_speech_test.cc", + "no_features_data.cc", + "no_features_data.h", "tiny_conv_model_data.cc", "tiny_conv_model_data.h", + "yes_features_data.cc", + "yes_features_data.h", ], tags = [ "nomsan", diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..438a432356be5c3cc9bfd08de5bd4d6f797c7014 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/README.md @@ -0,0 +1,103 @@ +# Micro Speech Example + +This examples shows how you can use TensorFlow Lite to run a 20 kilobyte neural network model to recognize keywords in speech. It's designed to run on systems with very small amounts of memory such as microcontrollers and DSPs. The code itself also has a small footprint (for example around 22 kilobytes on a Cortex M3) and only uses about 10 kilobytes of RAM for working memory, so it's able to run on systems like an STM32F103 with only 20 kilobytes of total SRAM and 64 kilobytes of Flash. + +## Table of Contents + + * [Getting Started](#getting-started) + * [Getting Started on a Microcontroller](#getting-started-on-a-microcontroller) + * [Calculating the Input to the Neural Network](#calculating-the-input-to-the-neural-network) + * [Creating Your Own Model](#creating-your-own-model) + +## Getting Started + +To compile and test this example on a desktop Linux or MacOS machine, download [the TensorFlow source code](https://github.com/tensorflow/tensorflow), `cd` into the source directory from a terminal, and then retrieve the support libraries you need by running: + +``` +tensorflow/contrib/lite/experimental/micro/tools/make/download_dependencies.sh +``` + +This will take a few minutes, and downloads frameworks the code uses like [CMSIS](https://developer.arm.com/embedded/cmsis) and [flatbuffers](https://google.github.io/flatbuffers/). Once that process has finished, run: + +``` +make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile test_micro_speech +``` + +You should see a series of files get compiled, followed by some logging output from a test, which should conclude with "~~~ALL TESTS PASSED~~~". If you see this, it means that a small program has been built and run that loads a trained TensorFlow model, runs some example inputs through it, and got the expected outputs. This particular test runs spectrograms generated from recordings of people saying "Yes" and "No", and checks that the network correctly identifies them. + +To understand how TensorFlow Lite does this, you can look at the `TestInvoke()` function in [micro_speech_test.cc](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc). It's a fairly small amount of code, creating an interpreter, getting a handle to a model that's been compiled into the program, and then invoking the interpreter with the model and sample inputs. + +## Getting Started on a Microcontroller + +Once you have downloaded the dependencies and got the x86/Linux build working, you can try building a version for the STM32F103 'bluepill' device. The following command will build the test and then run it on an emulator, assuming you have Docker installed: + +``` +make -f tensorflow/contrib/lite/experimental/micro/tools/make/Makefile TARGET=bluepill test_micro_speech +``` + +If you have a real device [(see here for how to set one up)](https://github.com/google/stm32_bare_lib/tree/master/README.md) you can then convert the ELF file into a a `.bin` format executable to load onto it by running: + +``` +arm-none-eabi-objcopy \ +tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test \ +tensorflow/contrib/lite/experimental/micro/tools/make/gen/bluepill_cortex-m3/bin/micro_speech_test.bin \ +--output binary +``` + +## Calculating the Input to the Neural Network + +The TensorFlow Lite model doesn't take in raw audio sample data. Instead it works with spectrograms, which are two dimensional arrays that are made up of slices of frequency information, each taken from a different time window. This test uses spectrograms that have been pre-calculated from one-second WAV files in the test data set. In a complete application these spectrograms would be calculated at runtime from microphone inputs, but the code for doing that is not yet included in this sample code. + +The recipe for creating the spectrogram data is that each frequency slice is created by running an FFT across a 30ms section of the audio sample data. The input samples are treated as being between -1 and +1 as real values (encoded as -32,768 and 32,767 in 16-bit signed integer samples). This results in an FFT with 256 entries. Every sequence of six entries is averaged together, giving a total of 43 frequency buckets in the final slice. The results are stored as unsigned eight-bit values, where 0 represents a real number of zero, and 255 represents 127.5 as a real number. Each adjacent frequency entry is stored in ascending memory order (frequency bucket 0 at data[0], bucket 1 at data [1], etc). The window for the frequency analysis is then moved forward by 20ms, and the process repeated, storing the results in the next memory row (for example bucket 0 in this moved window would be in data[43 + 0], etc). This process happens 49 times in total, producing a single channel image that is 43 pixels wide, and 49 rows high. Here's an illustration of the process: + +![spectrogram diagram](https://storage.googleapis.com/download.tensorflow.org/example_images/spectrogram_diagram.png) + + +The test data files have been generated by running the following commands: + +``` +bazel run tensorflow/examples/speech_commands:wav_to_features -- \ +--input_wav=${HOME}/speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav \ +--output_c_file=yes_features_data.cc \ +--window_stride=20 --preprocess=average --quantize=1 + +bazel run tensorflow/examples/speech_commands:wav_to_features -- \ +--input_wav=${HOME}/speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav \ +--output_c_file=no_features_data.cc \ +--window_stride=20 --preprocess=average --quantize=1 +``` + +## Creating Your Own Model + +The neural network model used in this example was built using the [TensorFlow speech commands tutorial](https://www.tensorflow.org/tutorials/sequences/audio_recognition). If you would like to create your own, you can start by training a model with this command: + +``` +bazel run -c opt --copt=-mavx2 --copt=-mfma \ +tensorflow/examples/speech_commands:train -- \ +--model_architecture=tiny_conv --window_stride=20 --preprocess=average \ +--wanted_words="yes,no" --silence_percentage=25 --unknown_percentage=25 --quantize=1 +``` + +If you see a compiling error on older machines, try leaving out the `--copt` arguments, they are just there to accelerate training on chips that support the extensions. The training process is likely to take a couple of hours. Once it has completed, the next step is to freeze the variables: + +``` +bazel run tensorflow/examples/speech_commands:freeze -- \ +--model_architecture=tiny_conv --window_stride=20 --preprocess=average \ +--wanted_words="yes,no" --quantize=1 --output_file=/tmp/tiny_conv.pb +``` + +The next step is to create a TensorFlow Lite file from the frozen graph: + +``` +bazel run tensorflow/contrib/lite/toco:toco -- \ +--input_file=/tmp/tiny_conv.pb --output_file=/tmp/tiny_conv.tflite \ +--input_shapes=1,49,43,1 --input_arrays=Reshape_1 --output_arrays='labels_softmax' \ +--inference_type=QUANTIZED_UINT8 --mean_values=0 --std_values=2 \ +--change_concat_input_ranges=false +``` + +Finally, convert the file into a C source file that can be compiled into an embedded system: + +``` +xxd -i /tmp/tiny_conv.tflite > /tmp/tiny_conv_model_data.cc +``` diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc index 86cd056a7216aa57126be3f6e660a7dcee0c6c44..0f4731fd4b2a0890bb29d818145f34affde8f304 100644 --- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h" #include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h" +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h" #include "tensorflow/contrib/lite/experimental/micro/kernels/all_ops_resolver.h" #include "tensorflow/contrib/lite/experimental/micro/micro_error_reporter.h" #include "tensorflow/contrib/lite/experimental/micro/micro_interpreter.h" @@ -24,9 +26,12 @@ limitations under the License. TF_LITE_MICRO_TESTS_BEGIN TF_LITE_MICRO_TEST(TestInvoke) { + // Set up logging. tflite::MicroErrorReporter micro_error_reporter; tflite::ErrorReporter* error_reporter = µ_error_reporter; + // Map the model into a usable data structure. This doesn't involve any + // copying or parsing, it's a very lightweight operation. const tflite::Model* model = ::tflite::GetModel(g_tiny_conv_model_data); if (model->version() != TFLITE_SCHEMA_VERSION) { error_reporter->Report( @@ -34,21 +39,98 @@ TF_LITE_MICRO_TEST(TestInvoke) { "to supported version %d.\n", model->version(), TFLITE_SCHEMA_VERSION); } + + // This pulls in all the operation implementations we need. tflite::ops::micro::AllOpsResolver resolver; + // Create an area of memory to use for input, output, and intermediate arrays. const int tensor_arena_size = 10 * 1024; uint8_t tensor_arena[tensor_arena_size]; tflite::SimpleTensorAllocator tensor_allocator(tensor_arena, tensor_arena_size); + // Build an interpreter to run the model with. tflite::MicroInterpreter interpreter(model, resolver, &tensor_allocator, error_reporter); + + // Get information about the memory area to use for the model's input. + TfLiteTensor* input = interpreter.input(0); + + // Make sure the input has the properties we expect. + TF_LITE_MICRO_EXPECT_NE(nullptr, input); + TF_LITE_MICRO_EXPECT_EQ(4, input->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, input->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(49, input->dims->data[1]); + TF_LITE_MICRO_EXPECT_EQ(43, input->dims->data[2]); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, input->type); + + // Copy a spectrogram created from a .wav audio file of someone saying "Yes", + // into the memory area used for the input. + const uint8_t* yes_features_data = g_yes_f2e59fea_nohash_1_data; + for (int i = 0; i < input->bytes; ++i) { + input->data.uint8[i] = yes_features_data[i]; + } + + // Run the model on this input and make sure it succeeds. TfLiteStatus invoke_status = interpreter.Invoke(); if (invoke_status != kTfLiteOk) { error_reporter->Report("Invoke failed\n"); } TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); + // Get the output from the model, and make sure it's the expected size and + // type. + TfLiteTensor* output = interpreter.output(0); + TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(4, output->dims->data[1]); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, output->type); + + // There are four possible classes in the output, each with a score. + const int kSilenceIndex = 0; + const int kUnknownIndex = 1; + const int kYesIndex = 2; + const int kNoIndex = 3; + + // Make sure that the expected "Yes" score is higher than the other classes. + uint8_t silence_score = output->data.uint8[kSilenceIndex]; + uint8_t unknown_score = output->data.uint8[kUnknownIndex]; + uint8_t yes_score = output->data.uint8[kYesIndex]; + uint8_t no_score = output->data.uint8[kNoIndex]; + TF_LITE_MICRO_EXPECT_GT(yes_score, silence_score); + TF_LITE_MICRO_EXPECT_GT(yes_score, unknown_score); + TF_LITE_MICRO_EXPECT_GT(yes_score, no_score); + + // Now test with a different input, from a recording of "No". + const uint8_t* no_features_data = g_no_f9643d42_nohash_4_data; + for (int i = 0; i < input->bytes; ++i) { + input->data.uint8[i] = no_features_data[i]; + } + + // Run the model on this "No" input. + invoke_status = interpreter.Invoke(); + if (invoke_status != kTfLiteOk) { + error_reporter->Report("Invoke failed\n"); + } + TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, invoke_status); + + // Get the output from the model, and make sure it's the expected size and + // type. + output = interpreter.output(0); + TF_LITE_MICRO_EXPECT_EQ(2, output->dims->size); + TF_LITE_MICRO_EXPECT_EQ(1, output->dims->data[0]); + TF_LITE_MICRO_EXPECT_EQ(4, output->dims->data[1]); + TF_LITE_MICRO_EXPECT_EQ(kTfLiteUInt8, output->type); + + // Make sure that the expected "No" score is higher than the other classes. + silence_score = output->data.uint8[kSilenceIndex]; + unknown_score = output->data.uint8[kUnknownIndex]; + yes_score = output->data.uint8[kYesIndex]; + no_score = output->data.uint8[kNoIndex]; + TF_LITE_MICRO_EXPECT_GT(no_score, silence_score); + TF_LITE_MICRO_EXPECT_GT(no_score, unknown_score); + TF_LITE_MICRO_EXPECT_GT(no_score, yes_score); + error_reporter->Report("Ran successfully\n"); } diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc new file mode 100644 index 0000000000000000000000000000000000000000..3615deb26c4f0ea0b3018a6144e7f2cc58cd8a1e --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc @@ -0,0 +1,152 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h" + +/* File automatically created by + * tensorflow/examples/speech_commands/wav_to_features.py \ + * --sample_rate=16000 \ + * --clip_duration_ms=1000 \ + * --window_size_ms=30 \ + * --window_stride_ms=20 \ + * --feature_bin_count=40 \ + * --quantize \ + * --preprocess="average" \ + * --input_wav="speech_commands_test_set_v0.02/no/f9643d42_nohash_4.wav" \ + * --output_c_file="no_features_data.cc" \ + */ + +const int g_no_f9643d42_nohash_4_width = 43; +const int g_no_f9643d42_nohash_4_height = 49; +const unsigned char g_no_f9643d42_nohash_4_data[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 67, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139, 2, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 195, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 230, 2, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 7, + 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 255, 7, 16, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 7, 22, 0, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 238, 5, 20, 3, 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 144, 4, 19, 3, 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 6, 3, + 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 3, 1, 5, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 1, 3, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, +}; diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h new file mode 100644 index 0000000000000000000000000000000000000000..b53d0a202b75eab7db82107f2c71c504a85f881e --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.h @@ -0,0 +1,23 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_ + +extern const int g_no_f9643d42_nohash_4_width; +extern const int g_no_f9643d42_nohash_4_height; +extern const unsigned char g_no_f9643d42_nohash_4_data[]; + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_NO_FEATURES_DATA_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc index f1f9e0e21994b0a79241690e533e4edc8bfe5565..f0769a1237d64a5f727ec86c5d8ff2e20086436d 100644 --- a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc @@ -15,6 +15,7 @@ limitations under the License. // Automatically created from a TensorFlow Lite flatbuffer using the command: // xxd -i tiny_conv.tflite > tiny_conv_model_data.cc +// See the README for a full description of the creation process. #include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.h" @@ -26,1643 +27,1643 @@ const unsigned char g_tiny_conv_model_data[] = { 0x01, 0x00, 0x00, 0x00, 0xf4, 0x47, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x54, 0x4f, 0x43, 0x4f, 0x20, 0x43, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x74, 0x65, 0x64, 0x2e, 0x00, 0x09, 0x00, 0x00, 0x00, 0xd4, 0x47, 0x00, 0x00, - 0x04, 0x03, 0x00, 0x00, 0xfc, 0x02, 0x00, 0x00, 0xf4, 0x02, 0x00, 0x00, - 0x64, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, + 0xb4, 0x47, 0x00, 0x00, 0xe4, 0x02, 0x00, 0x00, 0xb4, 0x02, 0x00, 0x00, + 0xac, 0x02, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb8, 0xb3, 0xff, 0xff, - 0x16, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0xd7, 0x02, 0x00, 0x00, 0x2f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe8, 0xb3, 0xff, 0xff, - 0x46, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, - 0xab, 0x00, 0x00, 0x00, 0x1e, 0xff, 0xff, 0xff, 0xed, 0xff, 0xff, 0xff, - 0x4a, 0x00, 0x00, 0x00, 0x62, 0xb4, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0x80, 0x02, 0x00, 0x00, 0xce, 0xad, 0xaf, 0x3c, 0xc8, 0xe9, 0xb0, 0x83, - 0xa1, 0xbf, 0xb2, 0xb1, 0xab, 0xd0, 0xa7, 0x53, 0xa5, 0xe9, 0xb5, 0xac, - 0xa2, 0xd3, 0xc4, 0x9e, 0x8b, 0xb2, 0x64, 0xb3, 0x9d, 0xa2, 0xae, 0xa6, - 0xd5, 0xbe, 0x43, 0x9f, 0x9c, 0x54, 0xb5, 0xa8, 0x49, 0x78, 0x86, 0xa2, - 0xa3, 0x55, 0x35, 0x96, 0x3d, 0x7f, 0xe2, 0xb5, 0xb0, 0x47, 0x28, 0xa9, - 0x9d, 0xbb, 0xd6, 0xff, 0xb7, 0x79, 0x63, 0xb5, 0xaf, 0xa7, 0xab, 0x7e, - 0xbc, 0xc7, 0xa0, 0xc3, 0xb1, 0xb6, 0xb2, 0xa1, 0xc2, 0xbb, 0x79, 0x57, - 0xbe, 0xc1, 0xb7, 0xb0, 0x6b, 0xb7, 0xa5, 0x75, 0x97, 0xb8, 0xe7, 0xac, - 0xad, 0x7e, 0xb1, 0x9b, 0xc3, 0xba, 0x6b, 0xa2, 0x7f, 0x58, 0xb9, 0x7a, - 0x4c, 0x91, 0x74, 0x9e, 0xa7, 0x3d, 0xc2, 0x94, 0x75, 0xa1, 0xa4, 0xac, - 0xab, 0x45, 0x2e, 0xb4, 0xb6, 0xbf, 0xc1, 0xdb, 0xaf, 0x6c, 0x67, 0xb1, - 0xa9, 0xa6, 0xa8, 0xca, 0xc2, 0xc4, 0xb9, 0xbf, 0xb4, 0xb9, 0xaa, 0x9d, - 0x9f, 0xb9, 0xb2, 0x71, 0xb2, 0xca, 0xbe, 0xaf, 0x5f, 0xbc, 0xa0, 0x5b, - 0xa8, 0xb4, 0xa4, 0xa8, 0xd8, 0x69, 0xb7, 0x8a, 0xbc, 0xb8, 0xaf, 0x9c, - 0x7c, 0x5d, 0xb3, 0x6b, 0x49, 0x95, 0x64, 0xa0, 0xa2, 0x49, 0xcb, 0x87, - 0xa5, 0xb5, 0xa1, 0xb2, 0xa3, 0x40, 0x6d, 0x9f, 0xc5, 0xb6, 0xbb, 0xd4, - 0x9c, 0x6d, 0x69, 0xa9, 0xa8, 0x91, 0xad, 0xb8, 0xd2, 0xc6, 0xaf, 0xb8, - 0xac, 0xa9, 0xa2, 0xa7, 0x60, 0xa6, 0xa1, 0xc9, 0xb8, 0xd6, 0xcf, 0xb1, - 0x56, 0xb4, 0xac, 0x40, 0xae, 0xbd, 0xbf, 0xa2, 0x54, 0x72, 0x9b, 0x8c, - 0xc2, 0xb5, 0xc2, 0x9b, 0x64, 0x6d, 0xb4, 0x62, 0x4e, 0x9b, 0x6c, 0xa6, - 0x8f, 0x4c, 0xca, 0x95, 0xb6, 0xbf, 0x92, 0xae, 0x9c, 0x49, 0xae, 0xb2, - 0xc0, 0xb6, 0xbc, 0xd1, 0xa4, 0x7b, 0x64, 0xa0, 0xa6, 0x81, 0xac, 0xa6, - 0xbd, 0xc8, 0xbc, 0xae, 0xaa, 0x9e, 0x61, 0xb1, 0x57, 0xac, 0xbf, 0xbf, - 0xbb, 0xe0, 0xa6, 0xae, 0x47, 0xc9, 0xbc, 0x57, 0xb0, 0xb5, 0xc7, 0x98, - 0xf4, 0x93, 0xb6, 0x70, 0xc3, 0xb3, 0xca, 0xab, 0x77, 0x9a, 0xac, 0x45, - 0x5c, 0x9e, 0x9a, 0xa9, 0x9b, 0x35, 0xc0, 0x6f, 0xc6, 0xc7, 0x91, 0xb4, - 0xa8, 0x3c, 0xce, 0xb8, 0xad, 0xb9, 0xb5, 0xdd, 0x9c, 0x6d, 0xbf, 0x91, - 0xb2, 0x7d, 0xa0, 0xaf, 0x9f, 0xbd, 0xb9, 0xcf, 0x9b, 0x5d, 0x3f, 0xac, - 0x64, 0xae, 0xaf, 0xb8, 0xbc, 0xb8, 0x86, 0xb5, 0x36, 0xcf, 0xb4, 0xa9, - 0xad, 0xcd, 0xdb, 0xa4, 0x68, 0xa6, 0xa4, 0x67, 0xc8, 0xb7, 0xe5, 0xa4, - 0x76, 0xb8, 0xa8, 0x28, 0x6b, 0xa5, 0xba, 0xad, 0x9f, 0x3a, 0xa5, 0x42, - 0xc5, 0xb0, 0x88, 0xad, 0xa5, 0x4d, 0xea, 0x8a, 0xb8, 0xb5, 0xb3, 0xd9, - 0xa0, 0x77, 0xbb, 0x92, 0x9e, 0x80, 0xbd, 0xbd, 0x6d, 0xcc, 0xab, 0x99, - 0x88, 0x58, 0x4d, 0xb0, 0x6c, 0xbc, 0x96, 0xbd, 0xae, 0xab, 0x5b, 0xac, - 0x2f, 0xc3, 0x9a, 0xbe, 0xac, 0xb3, 0x84, 0x9b, 0xe3, 0xaf, 0x95, 0x6b, - 0xc2, 0xb5, 0xca, 0xb7, 0x4e, 0xbc, 0x9d, 0x24, 0x75, 0xa9, 0xd2, 0xae, - 0xa0, 0x2b, 0x90, 0x34, 0xd1, 0xb5, 0x96, 0xae, 0xaa, 0x4d, 0xc1, 0xa3, - 0xb1, 0xb4, 0xaa, 0xd2, 0x9c, 0x7d, 0xc0, 0x91, 0x91, 0x7a, 0xb8, 0x83, - 0x44, 0xcb, 0xaf, 0x9b, 0x6b, 0x5b, 0x75, 0xb2, 0x62, 0xb6, 0xaa, 0xcb, - 0x99, 0xa8, 0x63, 0xae, 0x24, 0xc7, 0x8a, 0xbe, 0xa9, 0xb6, 0xa0, 0xa1, - 0x41, 0xac, 0x84, 0xb5, 0xb9, 0xb3, 0x9b, 0xad, 0x77, 0xbf, 0xa8, 0x7e, - 0x82, 0xb9, 0xbe, 0xaa, 0xa3, 0x47, 0x6d, 0xb5, 0xc3, 0xb1, 0xbf, 0xa7, - 0xb1, 0x57, 0x75, 0xb5, 0xb0, 0xb6, 0xb9, 0xce, 0xa4, 0x86, 0xb0, 0xa4, - 0x98, 0x80, 0xc5, 0x3e, 0x90, 0xca, 0x9b, 0xa2, 0x5a, 0x50, 0xc5, 0xa5, - 0xad, 0xc1, 0x9c, 0x91, 0x83, 0x8f, 0x21, 0xab, 0xac, 0xba, 0x70, 0xb4, - 0xae, 0x85, 0x7e, 0xa7, 0xbd, 0xba, 0x7c, 0xb2, 0xb5, 0xb2, 0x7e, 0xb3, - 0xc3, 0xcd, 0x82, 0xac, 0x9b, 0xb3, 0xa6, 0xb0, 0xbc, 0x6f, 0x52, 0xb9, - 0xbf, 0xb1, 0xa6, 0xa4, 0xc1, 0x7a, 0x90, 0xc0, 0xae, 0xab, 0x94, 0xd8, - 0xab, 0xa4, 0x98, 0xbb, 0x8b, 0x86, 0x94, 0x01, 0xad, 0xe7, 0xb1, 0x9b, - 0x57, 0x48, 0xc1, 0x88, 0xbf, 0xcc, 0xb4, 0x4b, 0x62, 0x8b, 0x48, 0xa7, - 0xbe, 0xe1, 0x80, 0xa6, 0xb3, 0x64, 0xaa, 0xa4, 0xcf, 0xba, 0x6d, 0xa6, - 0xb8, 0xa0, 0x8f, 0xb3, 0xce, 0xc3, 0x87, 0xb2, 0xa0, 0xc0, 0x78, 0xb0, - 0xb9, 0xaa, 0x40, 0xb8, 0xd8, 0xa3, 0x9a, 0xaa, 0xcc, 0xa2, 0x9f, 0xb9, - 0xbe, 0xc2, 0x89, 0xd6, 0xc6, 0x9c, 0xa3, 0xc7, 0x94, 0xb6, 0xff, 0xff, - 0x98, 0xb6, 0xff, 0xff, 0xf6, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, - 0xc0, 0x44, 0x00, 0x00, 0x4a, 0x4d, 0x59, 0x60, 0x5a, 0x45, 0x3d, 0x50, - 0x4a, 0x43, 0x3d, 0x59, 0x3e, 0x49, 0x4a, 0x59, 0x45, 0x44, 0x41, 0x5d, - 0x50, 0x2f, 0x4e, 0x34, 0x46, 0x48, 0x41, 0x4a, 0x4c, 0x3b, 0x4b, 0x3e, - 0x49, 0x49, 0x43, 0x4b, 0x3e, 0x49, 0x47, 0x41, 0x3e, 0x4a, 0x46, 0x43, - 0x41, 0x43, 0x47, 0x49, 0x4a, 0x4c, 0x46, 0x58, 0x3f, 0x4c, 0x4b, 0x4c, - 0x4d, 0x4b, 0x45, 0x52, 0x45, 0x42, 0x52, 0x52, 0x48, 0x40, 0x46, 0x5f, - 0x4c, 0x41, 0x47, 0x48, 0x48, 0x4c, 0x43, 0x61, 0x50, 0x4b, 0x49, 0x49, - 0x46, 0x3f, 0x40, 0x67, 0x40, 0x4d, 0x45, 0x40, 0x40, 0x45, 0x47, 0x56, - 0x44, 0x3a, 0x4a, 0x4c, 0x52, 0x48, 0x46, 0x50, 0x4b, 0x44, 0x51, 0x45, - 0x40, 0x45, 0x45, 0x48, 0x4e, 0x4e, 0x43, 0x48, 0x44, 0x4b, 0x45, 0x4a, - 0x53, 0x45, 0x4a, 0x4b, 0x3f, 0x43, 0x45, 0x53, 0x4d, 0x43, 0x46, 0x3f, - 0x47, 0x4e, 0x51, 0x50, 0x48, 0x4f, 0x4f, 0x4a, 0x4a, 0x4e, 0x45, 0x4e, - 0x46, 0x41, 0x4a, 0x46, 0x45, 0x47, 0x45, 0x4b, 0x50, 0x4c, 0x46, 0x45, - 0x41, 0x47, 0x41, 0x47, 0x46, 0x4f, 0x3f, 0x4f, 0x4a, 0x51, 0x4f, 0x53, - 0x54, 0x48, 0x51, 0x43, 0x4b, 0x48, 0x4d, 0x46, 0x48, 0x4f, 0x49, 0x44, - 0x43, 0x53, 0x50, 0x59, 0x56, 0x3d, 0x45, 0x44, 0x48, 0x38, 0x3b, 0x5f, - 0x39, 0x43, 0x43, 0x52, 0x46, 0x3e, 0x43, 0x58, 0x43, 0x1e, 0x50, 0x3c, - 0x46, 0x4b, 0x46, 0x50, 0x3c, 0x37, 0x4c, 0x47, 0x47, 0x4b, 0x47, 0x54, - 0x43, 0x3e, 0x47, 0x4f, 0x4b, 0x41, 0x53, 0x50, 0x42, 0x46, 0x4f, 0x4b, - 0x4e, 0x3f, 0x49, 0x52, 0x4a, 0x4a, 0x49, 0x53, 0x52, 0x47, 0x52, 0x5a, - 0x40, 0x42, 0x4d, 0x4b, 0x50, 0x43, 0x49, 0x59, 0x47, 0x4c, 0x4d, 0x50, - 0x4e, 0x3c, 0x44, 0x61, 0x51, 0x49, 0x49, 0x46, 0x49, 0x47, 0x4b, 0x5a, - 0x45, 0x4b, 0x43, 0x40, 0x44, 0x52, 0x4d, 0x54, 0x49, 0x47, 0x44, 0x48, - 0x46, 0x48, 0x3e, 0x40, 0x45, 0x4f, 0x4d, 0x4b, 0x4c, 0x40, 0x3d, 0x40, - 0x3e, 0x48, 0x50, 0x4e, 0x4c, 0x42, 0x48, 0x4b, 0x3d, 0x48, 0x4b, 0x44, - 0x52, 0x4b, 0x49, 0x4f, 0x49, 0x3f, 0x47, 0x43, 0x4d, 0x3f, 0x53, 0x4e, - 0x4a, 0x4f, 0x4e, 0x4e, 0x53, 0x42, 0x46, 0x4c, 0x44, 0x4c, 0x46, 0x51, - 0x45, 0x48, 0x4a, 0x50, 0x47, 0x41, 0x45, 0x54, 0x4a, 0x44, 0x50, 0x49, - 0x48, 0x50, 0x51, 0x4b, 0x50, 0x4c, 0x4a, 0x49, 0x43, 0x47, 0x50, 0x4a, - 0x4d, 0x4c, 0x4e, 0x49, 0x42, 0x50, 0x52, 0x48, 0x45, 0x5a, 0x4e, 0x55, - 0x51, 0x3d, 0x3d, 0x4d, 0x42, 0x32, 0x36, 0x64, 0x39, 0x4c, 0x41, 0x48, - 0x44, 0x35, 0x43, 0x56, 0x47, 0x1e, 0x4b, 0x3e, 0x47, 0x3f, 0x43, 0x52, - 0x51, 0x34, 0x41, 0x4d, 0x3e, 0x41, 0x41, 0x48, 0x3c, 0x4b, 0x45, 0x3b, - 0x40, 0x43, 0x4c, 0x46, 0x46, 0x47, 0x3e, 0x4f, 0x4b, 0x48, 0x42, 0x47, - 0x4e, 0x3e, 0x49, 0x47, 0x43, 0x43, 0x4e, 0x52, 0x51, 0x45, 0x3f, 0x54, - 0x46, 0x44, 0x48, 0x5d, 0x3e, 0x4a, 0x47, 0x52, 0x53, 0x3a, 0x4f, 0x5d, - 0x41, 0x4c, 0x48, 0x51, 0x43, 0x4b, 0x4b, 0x67, 0x48, 0x4b, 0x45, 0x4d, - 0x4b, 0x43, 0x4a, 0x54, 0x4c, 0x46, 0x43, 0x4a, 0x4d, 0x43, 0x4c, 0x47, - 0x4a, 0x48, 0x4d, 0x42, 0x4d, 0x48, 0x3f, 0x43, 0x4c, 0x44, 0x4e, 0x4c, - 0x40, 0x45, 0x4b, 0x48, 0x47, 0x47, 0x3e, 0x4c, 0x52, 0x41, 0x44, 0x4e, - 0x4d, 0x44, 0x49, 0x4d, 0x3d, 0x45, 0x48, 0x4f, 0x4c, 0x4a, 0x55, 0x51, - 0x4d, 0x4c, 0x45, 0x4e, 0x46, 0x45, 0x44, 0x49, 0x4e, 0x44, 0x40, 0x48, - 0x49, 0x44, 0x53, 0x51, 0x42, 0x41, 0x51, 0x49, 0x51, 0x45, 0x51, 0x3f, - 0x4b, 0x3f, 0x52, 0x3c, 0x50, 0x4d, 0x4f, 0x4b, 0x44, 0x4f, 0x40, 0x52, - 0x49, 0x4a, 0x50, 0x3f, 0x3d, 0x54, 0x4c, 0x53, 0x52, 0x45, 0x41, 0x43, - 0x47, 0x2d, 0x40, 0x63, 0x3a, 0x51, 0x43, 0x4e, 0x40, 0x2b, 0x36, 0x5b, - 0x4b, 0x12, 0x4d, 0x35, 0x4b, 0x3f, 0x44, 0x4a, 0x46, 0x31, 0x54, 0x48, - 0x43, 0x42, 0x3d, 0x51, 0x41, 0x45, 0x49, 0x4b, 0x47, 0x49, 0x3d, 0x3e, - 0x46, 0x3d, 0x4d, 0x48, 0x3d, 0x45, 0x48, 0x4b, 0x49, 0x52, 0x44, 0x4c, - 0x45, 0x44, 0x45, 0x49, 0x50, 0x48, 0x45, 0x46, 0x45, 0x44, 0x52, 0x55, - 0x46, 0x45, 0x4b, 0x3d, 0x42, 0x4a, 0x3e, 0x57, 0x48, 0x4b, 0x3c, 0x42, - 0x4a, 0x46, 0x47, 0x6c, 0x54, 0x4b, 0x41, 0x49, 0x49, 0x50, 0x43, 0x56, - 0x44, 0x43, 0x4d, 0x3e, 0x44, 0x41, 0x47, 0x40, 0x4a, 0x4b, 0x4d, 0x4d, - 0x3e, 0x46, 0x45, 0x47, 0x3e, 0x42, 0x4a, 0x45, 0x49, 0x3d, 0x3f, 0x43, - 0x40, 0x44, 0x47, 0x4a, 0x45, 0x4d, 0x4b, 0x4c, 0x43, 0x40, 0x3d, 0x3e, - 0x4c, 0x4c, 0x42, 0x4d, 0x48, 0x4d, 0x49, 0x42, 0x51, 0x51, 0x4c, 0x4b, - 0x53, 0x4f, 0x48, 0x4d, 0x40, 0x46, 0x45, 0x4b, 0x47, 0x47, 0x4b, 0x46, - 0x54, 0x42, 0x42, 0x46, 0x46, 0x4a, 0x4c, 0x55, 0x3f, 0x3c, 0x52, 0x4b, - 0x4b, 0x4d, 0x4e, 0x48, 0x53, 0x4c, 0x4b, 0x42, 0x52, 0x54, 0x50, 0x4b, - 0x40, 0x5f, 0x58, 0x53, 0x50, 0x42, 0x35, 0x48, 0x39, 0x24, 0x3c, 0x5e, - 0x41, 0x50, 0x3c, 0x51, 0x42, 0x26, 0x42, 0x56, 0x41, 0x0c, 0x3e, 0x3d, - 0x48, 0x3e, 0x50, 0x4b, 0x3a, 0x2c, 0x43, 0x3d, 0x48, 0x3e, 0x43, 0x48, - 0x4c, 0x3f, 0x4a, 0x3e, 0x51, 0x4a, 0x4f, 0x40, 0x47, 0x43, 0x50, 0x4c, - 0x43, 0x4d, 0x3f, 0x45, 0x4d, 0x3e, 0x4c, 0x44, 0x51, 0x47, 0x4b, 0x51, - 0x45, 0x49, 0x44, 0x3f, 0x46, 0x46, 0x46, 0x57, 0x49, 0x4c, 0x49, 0x4e, - 0x47, 0x4c, 0x47, 0x5e, 0x43, 0x46, 0x45, 0x4b, 0x52, 0x49, 0x45, 0x5f, - 0x47, 0x41, 0x46, 0x43, 0x4f, 0x3b, 0x43, 0x51, 0x46, 0x53, 0x4a, 0x4e, - 0x4b, 0x43, 0x4e, 0x40, 0x48, 0x49, 0x46, 0x3f, 0x48, 0x50, 0x4b, 0x41, - 0x4a, 0x47, 0x4b, 0x3d, 0x46, 0x49, 0x4b, 0x43, 0x43, 0x42, 0x3e, 0x47, - 0x47, 0x4a, 0x45, 0x46, 0x51, 0x48, 0x51, 0x4e, 0x3f, 0x50, 0x44, 0x4b, - 0x4d, 0x4e, 0x44, 0x4d, 0x3d, 0x49, 0x4a, 0x4e, 0x42, 0x51, 0x43, 0x42, - 0x46, 0x3e, 0x48, 0x4b, 0x4f, 0x50, 0x3d, 0x48, 0x4c, 0x4f, 0x46, 0x44, - 0x44, 0x48, 0x42, 0x4b, 0x48, 0x41, 0x43, 0x46, 0x4d, 0x49, 0x4f, 0x43, - 0x41, 0x44, 0x3f, 0x3d, 0x45, 0x4f, 0x45, 0x41, 0x40, 0x58, 0x4f, 0x54, - 0x5b, 0x4b, 0x3a, 0x47, 0x3d, 0x28, 0x3d, 0x57, 0x3e, 0x51, 0x3f, 0x47, - 0x3f, 0x2e, 0x3e, 0x54, 0x4e, 0x0b, 0x41, 0x3d, 0x3b, 0x3d, 0x43, 0x47, - 0x47, 0x28, 0x4d, 0x43, 0x43, 0x3b, 0x4e, 0x4a, 0x4d, 0x42, 0x51, 0x46, - 0x4f, 0x3d, 0x4c, 0x3a, 0x49, 0x49, 0x4a, 0x43, 0x42, 0x4b, 0x47, 0x42, - 0x42, 0x49, 0x3f, 0x4d, 0x46, 0x4a, 0x49, 0x4e, 0x42, 0x3c, 0x4a, 0x41, - 0x4c, 0x40, 0x4d, 0x5a, 0x49, 0x46, 0x51, 0x46, 0x4b, 0x4c, 0x46, 0x62, - 0x45, 0x42, 0x51, 0x4e, 0x4d, 0x3e, 0x4d, 0x5b, 0x4d, 0x43, 0x45, 0x50, - 0x4b, 0x40, 0x50, 0x53, 0x4f, 0x4f, 0x51, 0x53, 0x46, 0x41, 0x4e, 0x3a, - 0x4b, 0x47, 0x3f, 0x3e, 0x4d, 0x48, 0x53, 0x3f, 0x45, 0x42, 0x4c, 0x45, - 0x55, 0x4c, 0x4b, 0x39, 0x4a, 0x45, 0x48, 0x4d, 0x47, 0x40, 0x48, 0x4f, - 0x4d, 0x49, 0x3e, 0x41, 0x46, 0x4e, 0x40, 0x49, 0x4b, 0x47, 0x4c, 0x45, - 0x44, 0x51, 0x4f, 0x4b, 0x48, 0x49, 0x44, 0x41, 0x43, 0x46, 0x51, 0x45, - 0x40, 0x48, 0x4b, 0x42, 0x44, 0x4f, 0x53, 0x4d, 0x44, 0x46, 0x4e, 0x4c, - 0x48, 0x50, 0x41, 0x45, 0x42, 0x48, 0x4d, 0x4d, 0x47, 0x45, 0x41, 0x45, - 0x48, 0x58, 0x4e, 0x46, 0x43, 0x53, 0x57, 0x52, 0x5e, 0x42, 0x45, 0x4e, - 0x39, 0x24, 0x32, 0x56, 0x47, 0x56, 0x49, 0x52, 0x46, 0x26, 0x3a, 0x51, - 0x4b, 0x05, 0x3e, 0x43, 0x3f, 0x38, 0x4d, 0x4b, 0x4f, 0x27, 0x51, 0x46, - 0x47, 0x41, 0x4a, 0x47, 0x4a, 0x3e, 0x44, 0x51, 0x3f, 0x3a, 0x43, 0x46, - 0x4d, 0x49, 0x46, 0x52, 0x43, 0x48, 0x49, 0x3e, 0x47, 0x46, 0x4a, 0x4d, - 0x47, 0x46, 0x52, 0x50, 0x44, 0x48, 0x4c, 0x47, 0x45, 0x41, 0x49, 0x5b, - 0x4d, 0x4b, 0x47, 0x4c, 0x4a, 0x47, 0x45, 0x5b, 0x49, 0x46, 0x52, 0x47, - 0x47, 0x3d, 0x55, 0x59, 0x40, 0x4b, 0x3e, 0x50, 0x42, 0x43, 0x40, 0x4f, - 0x48, 0x3f, 0x47, 0x53, 0x4d, 0x44, 0x4e, 0x37, 0x4c, 0x43, 0x51, 0x4d, - 0x46, 0x4e, 0x40, 0x41, 0x52, 0x44, 0x43, 0x4a, 0x50, 0x48, 0x47, 0x42, - 0x48, 0x45, 0x50, 0x4d, 0x42, 0x52, 0x44, 0x43, 0x45, 0x43, 0x4c, 0x4d, - 0x44, 0x51, 0x47, 0x48, 0x51, 0x4f, 0x48, 0x45, 0x49, 0x4a, 0x3e, 0x43, - 0x4d, 0x4e, 0x4e, 0x46, 0x54, 0x4d, 0x49, 0x4d, 0x47, 0x46, 0x4b, 0x41, - 0x4a, 0x49, 0x44, 0x45, 0x4d, 0x3e, 0x53, 0x50, 0x47, 0x4d, 0x4e, 0x43, - 0x4f, 0x45, 0x4e, 0x4a, 0x47, 0x49, 0x4c, 0x4c, 0x4d, 0x54, 0x42, 0x4c, - 0x43, 0x5d, 0x59, 0x50, 0x5e, 0x4b, 0x44, 0x43, 0x3c, 0x25, 0x31, 0x5b, - 0x46, 0x5a, 0x50, 0x4d, 0x41, 0x2a, 0x41, 0x4f, 0x44, 0x00, 0x41, 0x3d, - 0x43, 0x4b, 0x47, 0x45, 0x4e, 0x2e, 0x44, 0x46, 0x53, 0x3d, 0x43, 0x41, - 0x44, 0x46, 0x49, 0x42, 0x45, 0x4f, 0x4d, 0x3a, 0x43, 0x3c, 0x47, 0x53, - 0x43, 0x4e, 0x3f, 0x41, 0x4d, 0x50, 0x4b, 0x4c, 0x51, 0x47, 0x53, 0x4f, - 0x45, 0x4a, 0x44, 0x45, 0x41, 0x46, 0x47, 0x50, 0x51, 0x3f, 0x3e, 0x41, - 0x48, 0x45, 0x46, 0x5d, 0x45, 0x4a, 0x4c, 0x46, 0x4a, 0x49, 0x50, 0x51, - 0x51, 0x4c, 0x4f, 0x47, 0x47, 0x42, 0x45, 0x47, 0x4e, 0x48, 0x46, 0x40, - 0x45, 0x46, 0x4d, 0x3b, 0x4d, 0x52, 0x4c, 0x51, 0x49, 0x51, 0x47, 0x3d, - 0x4d, 0x42, 0x4f, 0x4e, 0x43, 0x43, 0x45, 0x3a, 0x42, 0x50, 0x4c, 0x4a, - 0x41, 0x53, 0x4c, 0x45, 0x51, 0x3f, 0x54, 0x43, 0x4b, 0x54, 0x56, 0x4d, - 0x4f, 0x4a, 0x50, 0x4b, 0x44, 0x45, 0x4f, 0x4f, 0x47, 0x3e, 0x50, 0x4f, - 0x4b, 0x48, 0x4d, 0x49, 0x55, 0x4d, 0x45, 0x4d, 0x4a, 0x53, 0x43, 0x46, - 0x4c, 0x45, 0x41, 0x46, 0x49, 0x49, 0x4f, 0x4b, 0x49, 0x50, 0x52, 0x49, - 0x41, 0x54, 0x44, 0x4c, 0x44, 0x63, 0x4a, 0x49, 0x40, 0x59, 0x52, 0x52, - 0x59, 0x3f, 0x3e, 0x3e, 0x40, 0x25, 0x3c, 0x5c, 0x4f, 0x57, 0x44, 0x50, - 0x41, 0x2a, 0x48, 0x4f, 0x43, 0x08, 0x47, 0x43, 0x49, 0x48, 0x4d, 0x49, - 0x46, 0x2b, 0x48, 0x44, 0x4e, 0x47, 0x47, 0x43, 0x44, 0x3e, 0x4a, 0x52, - 0x3f, 0x4a, 0x53, 0x42, 0x49, 0x47, 0x4c, 0x50, 0x43, 0x46, 0x46, 0x3c, - 0x4c, 0x47, 0x4e, 0x4d, 0x42, 0x41, 0x53, 0x52, 0x4f, 0x40, 0x54, 0x50, - 0x46, 0x43, 0x50, 0x56, 0x51, 0x48, 0x48, 0x48, 0x49, 0x39, 0x47, 0x5e, - 0x4e, 0x4b, 0x4f, 0x4e, 0x43, 0x45, 0x42, 0x58, 0x4a, 0x3b, 0x48, 0x4d, - 0x43, 0x3e, 0x4b, 0x43, 0x3c, 0x45, 0x46, 0x4b, 0x42, 0x42, 0x4e, 0x3d, - 0x4b, 0x4e, 0x51, 0x52, 0x48, 0x3e, 0x4b, 0x3f, 0x4c, 0x4a, 0x4b, 0x4c, - 0x46, 0x48, 0x3e, 0x48, 0x47, 0x4d, 0x4a, 0x46, 0x49, 0x4d, 0x4a, 0x48, - 0x50, 0x4b, 0x40, 0x48, 0x4b, 0x52, 0x46, 0x50, 0x4f, 0x3e, 0x42, 0x44, - 0x44, 0x42, 0x43, 0x49, 0x4f, 0x4f, 0x46, 0x42, 0x4a, 0x54, 0x42, 0x48, - 0x50, 0x4f, 0x4f, 0x4c, 0x4c, 0x47, 0x52, 0x49, 0x4c, 0x45, 0x4a, 0x4d, - 0x4a, 0x41, 0x47, 0x4a, 0x4d, 0x4a, 0x4c, 0x46, 0x51, 0x44, 0x4b, 0x49, - 0x53, 0x5e, 0x45, 0x4a, 0x3b, 0x57, 0x5a, 0x4c, 0x59, 0x43, 0x3e, 0x4a, - 0x3e, 0x20, 0x36, 0x5d, 0x47, 0x5b, 0x3f, 0x55, 0x3e, 0x24, 0x41, 0x52, - 0x3f, 0x01, 0x49, 0x41, 0x40, 0x45, 0x42, 0x46, 0x49, 0x2a, 0x47, 0x40, - 0x44, 0x3f, 0x42, 0x47, 0x4e, 0x42, 0x4b, 0x3d, 0x45, 0x4c, 0x47, 0x3d, - 0x4c, 0x44, 0x48, 0x43, 0x43, 0x41, 0x4a, 0x3d, 0x48, 0x4b, 0x46, 0x4e, - 0x4c, 0x45, 0x48, 0x4d, 0x54, 0x4d, 0x3e, 0x46, 0x3e, 0x47, 0x44, 0x4e, - 0x48, 0x49, 0x53, 0x4b, 0x41, 0x45, 0x4c, 0x57, 0x52, 0x4e, 0x40, 0x48, - 0x4d, 0x43, 0x44, 0x5a, 0x4a, 0x4c, 0x48, 0x4d, 0x3f, 0x52, 0x41, 0x50, - 0x4a, 0x47, 0x3e, 0x43, 0x4c, 0x42, 0x48, 0x3e, 0x4f, 0x4b, 0x41, 0x43, - 0x49, 0x40, 0x43, 0x36, 0x3f, 0x4b, 0x49, 0x49, 0x51, 0x43, 0x48, 0x40, - 0x4c, 0x51, 0x4d, 0x4a, 0x49, 0x3f, 0x4b, 0x3d, 0x4f, 0x4b, 0x43, 0x4d, - 0x46, 0x40, 0x46, 0x4d, 0x49, 0x48, 0x4d, 0x4c, 0x52, 0x4c, 0x49, 0x4f, - 0x53, 0x40, 0x49, 0x53, 0x47, 0x43, 0x4c, 0x45, 0x42, 0x48, 0x42, 0x4e, - 0x49, 0x43, 0x42, 0x40, 0x4f, 0x46, 0x50, 0x47, 0x51, 0x4a, 0x52, 0x45, - 0x4c, 0x51, 0x48, 0x47, 0x40, 0x41, 0x52, 0x4f, 0x41, 0x5a, 0x53, 0x47, - 0x42, 0x5f, 0x55, 0x4f, 0x53, 0x3e, 0x41, 0x49, 0x3d, 0x20, 0x3f, 0x54, - 0x42, 0x5b, 0x49, 0x4d, 0x3d, 0x22, 0x3e, 0x48, 0x41, 0x01, 0x4c, 0x3d, - 0x43, 0x4a, 0x46, 0x43, 0x4f, 0x2b, 0x49, 0x46, 0x47, 0x4a, 0x51, 0x3d, - 0x4b, 0x44, 0x49, 0x41, 0x47, 0x47, 0x45, 0x3a, 0x44, 0x42, 0x40, 0x52, - 0x46, 0x51, 0x4a, 0x41, 0x4a, 0x52, 0x44, 0x52, 0x4a, 0x40, 0x46, 0x45, - 0x52, 0x4c, 0x4e, 0x42, 0x42, 0x48, 0x40, 0x4f, 0x4b, 0x4f, 0x51, 0x4c, - 0x4e, 0x48, 0x4a, 0x5a, 0x46, 0x3d, 0x41, 0x50, 0x52, 0x4c, 0x44, 0x53, - 0x4b, 0x4d, 0x4f, 0x49, 0x47, 0x4c, 0x48, 0x45, 0x48, 0x4a, 0x44, 0x4e, - 0x4c, 0x40, 0x4d, 0x35, 0x40, 0x49, 0x4a, 0x51, 0x49, 0x4a, 0x46, 0x36, - 0x46, 0x47, 0x4a, 0x4c, 0x40, 0x4e, 0x42, 0x38, 0x48, 0x45, 0x42, 0x49, - 0x54, 0x4c, 0x3f, 0x49, 0x4c, 0x39, 0x47, 0x45, 0x4e, 0x4a, 0x42, 0x44, - 0x4b, 0x53, 0x43, 0x40, 0x46, 0x51, 0x3d, 0x50, 0x4b, 0x43, 0x4a, 0x4c, - 0x55, 0x54, 0x4a, 0x43, 0x48, 0x40, 0x44, 0x3f, 0x47, 0x45, 0x3e, 0x41, - 0x49, 0x44, 0x4d, 0x49, 0x44, 0x41, 0x4a, 0x50, 0x44, 0x49, 0x4d, 0x47, - 0x4a, 0x49, 0x46, 0x49, 0x40, 0x5b, 0x4d, 0x51, 0x47, 0x57, 0x49, 0x4f, - 0x56, 0x46, 0x3a, 0x4a, 0x3e, 0x22, 0x36, 0x5c, 0x44, 0x56, 0x46, 0x48, - 0x3a, 0x2d, 0x4a, 0x48, 0x44, 0x17, 0x41, 0x42, 0x40, 0x3d, 0x4e, 0x45, - 0x40, 0x26, 0x43, 0x52, 0x41, 0x40, 0x44, 0x4a, 0x48, 0x42, 0x4f, 0x47, - 0x46, 0x4c, 0x4a, 0x3b, 0x42, 0x3e, 0x3e, 0x49, 0x4e, 0x44, 0x4e, 0x49, - 0x47, 0x41, 0x47, 0x44, 0x4c, 0x45, 0x4d, 0x49, 0x49, 0x48, 0x55, 0x3d, - 0x4a, 0x45, 0x50, 0x4f, 0x46, 0x4c, 0x46, 0x45, 0x3c, 0x51, 0x4b, 0x5a, - 0x46, 0x47, 0x54, 0x41, 0x44, 0x40, 0x4f, 0x53, 0x49, 0x46, 0x46, 0x48, - 0x44, 0x40, 0x50, 0x49, 0x49, 0x43, 0x50, 0x41, 0x52, 0x4b, 0x46, 0x3e, - 0x44, 0x44, 0x46, 0x4e, 0x47, 0x48, 0x3e, 0x38, 0x4c, 0x4c, 0x48, 0x43, - 0x48, 0x3e, 0x50, 0x42, 0x51, 0x50, 0x4a, 0x48, 0x4a, 0x42, 0x44, 0x3d, - 0x4a, 0x46, 0x46, 0x3d, 0x4e, 0x47, 0x3d, 0x48, 0x4c, 0x46, 0x50, 0x4d, - 0x49, 0x45, 0x4a, 0x4c, 0x4c, 0x47, 0x4a, 0x42, 0x4a, 0x45, 0x50, 0x52, - 0x4b, 0x4d, 0x4c, 0x43, 0x42, 0x53, 0x41, 0x45, 0x49, 0x41, 0x4b, 0x4c, - 0x52, 0x54, 0x4b, 0x41, 0x48, 0x4c, 0x47, 0x4c, 0x41, 0x49, 0x4a, 0x47, - 0x50, 0x59, 0x4e, 0x45, 0x3c, 0x5d, 0x53, 0x4c, 0x5a, 0x3e, 0x3a, 0x51, - 0x3a, 0x22, 0x35, 0x59, 0x40, 0x5a, 0x43, 0x46, 0x41, 0x32, 0x44, 0x4b, - 0x47, 0x04, 0x4c, 0x3a, 0x4a, 0x49, 0x48, 0x3d, 0x45, 0x2b, 0x50, 0x41, - 0x3e, 0x44, 0x4f, 0x43, 0x4a, 0x3f, 0x48, 0x4b, 0x53, 0x49, 0x4b, 0x38, - 0x44, 0x40, 0x48, 0x4c, 0x41, 0x3f, 0x47, 0x3e, 0x47, 0x49, 0x45, 0x42, - 0x43, 0x3e, 0x46, 0x44, 0x53, 0x4d, 0x48, 0x44, 0x45, 0x42, 0x43, 0x53, - 0x55, 0x49, 0x4d, 0x4b, 0x45, 0x44, 0x47, 0x5f, 0x48, 0x44, 0x4a, 0x48, - 0x45, 0x4d, 0x4f, 0x5e, 0x4e, 0x46, 0x49, 0x49, 0x4d, 0x49, 0x44, 0x48, - 0x4d, 0x41, 0x50, 0x48, 0x3d, 0x3f, 0x4d, 0x38, 0x46, 0x4a, 0x50, 0x4a, - 0x45, 0x3e, 0x43, 0x36, 0x42, 0x48, 0x53, 0x54, 0x49, 0x43, 0x4b, 0x3a, - 0x45, 0x48, 0x50, 0x45, 0x4a, 0x4c, 0x4a, 0x4d, 0x43, 0x4c, 0x55, 0x4e, - 0x4c, 0x42, 0x45, 0x52, 0x52, 0x45, 0x46, 0x40, 0x54, 0x4c, 0x3d, 0x4e, - 0x49, 0x4e, 0x44, 0x47, 0x45, 0x48, 0x4b, 0x50, 0x49, 0x4b, 0x44, 0x4b, - 0x4f, 0x49, 0x47, 0x47, 0x53, 0x3f, 0x4b, 0x42, 0x45, 0x3e, 0x4d, 0x4d, - 0x48, 0x51, 0x45, 0x40, 0x43, 0x43, 0x4e, 0x44, 0x51, 0x55, 0x4a, 0x3e, - 0x45, 0x55, 0x58, 0x50, 0x50, 0x38, 0x44, 0x4f, 0x3b, 0x23, 0x3c, 0x55, - 0x3c, 0x54, 0x49, 0x42, 0x44, 0x2f, 0x3e, 0x47, 0x42, 0x01, 0x42, 0x37, - 0x3f, 0x42, 0x45, 0x45, 0x47, 0x2a, 0x52, 0x4b, 0x45, 0x3c, 0x47, 0x44, - 0x44, 0x40, 0x50, 0x53, 0x48, 0x42, 0x4d, 0x36, 0x50, 0x3d, 0x49, 0x44, - 0x4f, 0x4c, 0x4a, 0x42, 0x4d, 0x3e, 0x3d, 0x3f, 0x4e, 0x44, 0x4d, 0x4e, - 0x54, 0x3d, 0x42, 0x46, 0x49, 0x47, 0x4b, 0x53, 0x45, 0x46, 0x47, 0x4a, - 0x45, 0x3d, 0x4a, 0x5f, 0x51, 0x3e, 0x45, 0x45, 0x44, 0x3a, 0x4d, 0x57, - 0x45, 0x47, 0x4d, 0x45, 0x4e, 0x4b, 0x51, 0x48, 0x4b, 0x4a, 0x3c, 0x4e, - 0x51, 0x41, 0x4d, 0x36, 0x47, 0x4a, 0x46, 0x51, 0x4e, 0x4c, 0x52, 0x41, - 0x55, 0x47, 0x41, 0x47, 0x4d, 0x47, 0x4b, 0x3d, 0x4a, 0x4a, 0x46, 0x49, - 0x4d, 0x48, 0x46, 0x46, 0x4d, 0x52, 0x52, 0x48, 0x49, 0x3f, 0x4b, 0x4e, - 0x4c, 0x49, 0x45, 0x47, 0x41, 0x4b, 0x44, 0x48, 0x52, 0x4b, 0x53, 0x44, - 0x46, 0x4e, 0x44, 0x49, 0x52, 0x50, 0x46, 0x4b, 0x44, 0x43, 0x50, 0x49, - 0x4a, 0x53, 0x45, 0x49, 0x52, 0x3f, 0x4a, 0x4e, 0x49, 0x4c, 0x4d, 0x4d, - 0x40, 0x40, 0x3f, 0x4a, 0x47, 0x56, 0x51, 0x43, 0x40, 0x5a, 0x58, 0x52, - 0x4f, 0x3d, 0x3d, 0x45, 0x38, 0x29, 0x33, 0x59, 0x45, 0x54, 0x3c, 0x42, - 0x3f, 0x27, 0x3e, 0x49, 0x48, 0x06, 0x4a, 0x3f, 0x41, 0x49, 0x4c, 0x48, - 0x46, 0x2b, 0x4a, 0x4f, 0x44, 0x46, 0x4c, 0x46, 0x4a, 0x3b, 0x4d, 0x4a, - 0x40, 0x41, 0x45, 0x38, 0x51, 0x39, 0x46, 0x46, 0x41, 0x51, 0x4e, 0x41, - 0x49, 0x44, 0x48, 0x4a, 0x4b, 0x46, 0x47, 0x46, 0x4a, 0x4c, 0x47, 0x48, - 0x3d, 0x42, 0x50, 0x4f, 0x50, 0x4a, 0x4a, 0x48, 0x4a, 0x45, 0x45, 0x61, - 0x4a, 0x4c, 0x49, 0x3d, 0x4b, 0x4a, 0x4a, 0x5a, 0x48, 0x49, 0x50, 0x4f, - 0x42, 0x48, 0x3e, 0x44, 0x43, 0x3b, 0x4f, 0x54, 0x4b, 0x4a, 0x47, 0x31, - 0x4a, 0x49, 0x47, 0x4e, 0x48, 0x48, 0x46, 0x42, 0x4a, 0x45, 0x4c, 0x49, - 0x4b, 0x4e, 0x53, 0x43, 0x4c, 0x49, 0x4f, 0x4b, 0x46, 0x4c, 0x4b, 0x4e, - 0x51, 0x4b, 0x49, 0x52, 0x44, 0x55, 0x45, 0x49, 0x4b, 0x4a, 0x50, 0x4c, - 0x4d, 0x4a, 0x4b, 0x48, 0x41, 0x46, 0x47, 0x43, 0x4b, 0x3f, 0x54, 0x4a, - 0x46, 0x49, 0x51, 0x48, 0x4e, 0x4a, 0x41, 0x52, 0x52, 0x4e, 0x53, 0x47, - 0x42, 0x48, 0x43, 0x44, 0x54, 0x51, 0x40, 0x49, 0x4c, 0x48, 0x49, 0x44, - 0x4c, 0x56, 0x52, 0x49, 0x3d, 0x59, 0x4f, 0x56, 0x56, 0x42, 0x46, 0x45, - 0x3e, 0x28, 0x3f, 0x5b, 0x3f, 0x5a, 0x4c, 0x42, 0x44, 0x22, 0x3f, 0x46, - 0x47, 0x0d, 0x3e, 0x41, 0x45, 0x49, 0x4a, 0x3b, 0x45, 0x2d, 0x4d, 0x4a, - 0x44, 0x43, 0x49, 0x46, 0x4b, 0x47, 0x49, 0x45, 0x4e, 0x40, 0x4c, 0x3c, - 0x42, 0x3e, 0x4b, 0x50, 0x48, 0x49, 0x4c, 0x42, 0x3c, 0x43, 0x50, 0x43, - 0x49, 0x4e, 0x4e, 0x43, 0x46, 0x4c, 0x48, 0x4a, 0x43, 0x4c, 0x49, 0x4e, - 0x47, 0x44, 0x50, 0x4c, 0x4a, 0x48, 0x47, 0x5f, 0x3f, 0x3e, 0x48, 0x4f, - 0x4f, 0x49, 0x4a, 0x5f, 0x4e, 0x40, 0x4e, 0x48, 0x47, 0x44, 0x40, 0x4d, - 0x3f, 0x4a, 0x53, 0x45, 0x3e, 0x50, 0x3f, 0x39, 0x50, 0x45, 0x45, 0x4b, - 0x43, 0x41, 0x46, 0x41, 0x49, 0x47, 0x4b, 0x41, 0x3c, 0x4b, 0x46, 0x3f, - 0x41, 0x4a, 0x4e, 0x4c, 0x49, 0x4c, 0x3f, 0x44, 0x53, 0x4c, 0x45, 0x49, - 0x48, 0x4d, 0x48, 0x4a, 0x48, 0x4f, 0x45, 0x4d, 0x48, 0x4c, 0x41, 0x49, - 0x42, 0x48, 0x53, 0x46, 0x4a, 0x46, 0x4b, 0x4f, 0x4c, 0x52, 0x4c, 0x51, - 0x41, 0x4d, 0x49, 0x41, 0x49, 0x4f, 0x49, 0x42, 0x4a, 0x48, 0x51, 0x4a, - 0x44, 0x4d, 0x55, 0x48, 0x47, 0x4d, 0x4d, 0x45, 0x42, 0x60, 0x4a, 0x51, - 0x42, 0x54, 0x56, 0x56, 0x50, 0x4a, 0x3f, 0x4a, 0x40, 0x25, 0x3a, 0x59, - 0x46, 0x58, 0x52, 0x46, 0x41, 0x28, 0x3d, 0x3e, 0x45, 0x13, 0x47, 0x41, - 0x3d, 0x44, 0x48, 0x45, 0x49, 0x26, 0x46, 0x4c, 0x3b, 0x4a, 0x42, 0x47, - 0x46, 0x41, 0x44, 0x52, 0x50, 0x4a, 0x4f, 0x40, 0x4b, 0x39, 0x42, 0x45, - 0x4a, 0x4d, 0x4f, 0x3f, 0x42, 0x4f, 0x49, 0x45, 0x42, 0x4a, 0x46, 0x47, - 0x48, 0x40, 0x4a, 0x46, 0x41, 0x3b, 0x48, 0x55, 0x4b, 0x4e, 0x4e, 0x48, - 0x4b, 0x44, 0x46, 0x53, 0x48, 0x45, 0x4b, 0x53, 0x49, 0x43, 0x4a, 0x5c, - 0x46, 0x45, 0x45, 0x49, 0x49, 0x49, 0x4c, 0x43, 0x4e, 0x4a, 0x41, 0x4a, - 0x42, 0x43, 0x4a, 0x38, 0x44, 0x4a, 0x4b, 0x3f, 0x45, 0x49, 0x45, 0x38, - 0x43, 0x40, 0x45, 0x4c, 0x47, 0x42, 0x3f, 0x42, 0x3e, 0x4a, 0x43, 0x50, - 0x4a, 0x4e, 0x4f, 0x47, 0x4d, 0x49, 0x49, 0x47, 0x4a, 0x4d, 0x46, 0x4c, - 0x4f, 0x3d, 0x52, 0x4a, 0x41, 0x44, 0x4b, 0x50, 0x4c, 0x52, 0x49, 0x50, - 0x4b, 0x45, 0x49, 0x4d, 0x48, 0x55, 0x50, 0x47, 0x4e, 0x50, 0x4f, 0x48, - 0x46, 0x4d, 0x4d, 0x41, 0x48, 0x51, 0x4b, 0x4c, 0x47, 0x51, 0x42, 0x42, - 0x4d, 0x47, 0x43, 0x4c, 0x4c, 0x5a, 0x4e, 0x47, 0x3b, 0x59, 0x51, 0x57, - 0x4c, 0x40, 0x46, 0x4c, 0x37, 0x2a, 0x35, 0x58, 0x44, 0x5b, 0x4c, 0x44, - 0x3e, 0x2e, 0x3f, 0x43, 0x46, 0x23, 0x49, 0x3e, 0x41, 0x3f, 0x4b, 0x3e, - 0x4e, 0x2f, 0x4d, 0x4a, 0x4e, 0x40, 0x4e, 0x41, 0x40, 0x3f, 0x4a, 0x42, - 0x4d, 0x4c, 0x44, 0x47, 0x4e, 0x44, 0x40, 0x43, 0x4d, 0x49, 0x4f, 0x3d, - 0x49, 0x3f, 0x51, 0x48, 0x42, 0x4a, 0x49, 0x47, 0x49, 0x46, 0x4a, 0x45, - 0x45, 0x49, 0x53, 0x4d, 0x4c, 0x4e, 0x44, 0x50, 0x4b, 0x43, 0x4e, 0x5f, - 0x3c, 0x40, 0x44, 0x46, 0x48, 0x4b, 0x42, 0x62, 0x4e, 0x50, 0x4c, 0x49, - 0x4a, 0x4f, 0x44, 0x53, 0x42, 0x43, 0x49, 0x48, 0x4b, 0x3c, 0x4a, 0x37, - 0x4c, 0x41, 0x49, 0x46, 0x46, 0x47, 0x43, 0x40, 0x4d, 0x4d, 0x4a, 0x48, - 0x50, 0x4b, 0x50, 0x41, 0x44, 0x3e, 0x51, 0x47, 0x44, 0x4a, 0x44, 0x45, - 0x48, 0x4d, 0x52, 0x4e, 0x44, 0x48, 0x4d, 0x43, 0x42, 0x45, 0x48, 0x52, - 0x44, 0x42, 0x50, 0x42, 0x4d, 0x45, 0x48, 0x4d, 0x4f, 0x4e, 0x45, 0x49, - 0x51, 0x48, 0x4f, 0x53, 0x4d, 0x4c, 0x48, 0x50, 0x4e, 0x4d, 0x50, 0x48, - 0x49, 0x42, 0x4c, 0x42, 0x4b, 0x4b, 0x49, 0x48, 0x48, 0x49, 0x4a, 0x54, - 0x44, 0x57, 0x4d, 0x4b, 0x3f, 0x56, 0x53, 0x5c, 0x50, 0x4e, 0x46, 0x49, - 0x40, 0x24, 0x44, 0x58, 0x49, 0x54, 0x48, 0x49, 0x41, 0x22, 0x44, 0x3f, - 0x48, 0x1c, 0x4d, 0x39, 0x3e, 0x4c, 0x3d, 0x4a, 0x48, 0x2d, 0x48, 0x3e, - 0x3f, 0x3a, 0x46, 0x4e, 0x44, 0x43, 0x49, 0x51, 0x4d, 0x3c, 0x44, 0x41, - 0x4e, 0x44, 0x42, 0x4c, 0x45, 0x48, 0x45, 0x46, 0x42, 0x46, 0x47, 0x42, - 0x4f, 0x45, 0x47, 0x44, 0x48, 0x47, 0x4a, 0x42, 0x4d, 0x48, 0x3e, 0x53, - 0x47, 0x4b, 0x44, 0x4b, 0x45, 0x4a, 0x50, 0x55, 0x4c, 0x45, 0x48, 0x43, - 0x53, 0x3d, 0x4e, 0x5f, 0x42, 0x44, 0x4a, 0x4f, 0x3f, 0x48, 0x4e, 0x4b, - 0x43, 0x48, 0x43, 0x41, 0x4a, 0x4b, 0x51, 0x39, 0x52, 0x46, 0x44, 0x49, - 0x48, 0x45, 0x4c, 0x40, 0x45, 0x49, 0x51, 0x48, 0x45, 0x42, 0x45, 0x48, - 0x40, 0x43, 0x3d, 0x47, 0x53, 0x54, 0x4d, 0x4a, 0x4a, 0x47, 0x48, 0x43, - 0x4c, 0x46, 0x43, 0x4f, 0x49, 0x4c, 0x3f, 0x3d, 0x4b, 0x41, 0x40, 0x48, - 0x4e, 0x4c, 0x4b, 0x40, 0x4c, 0x43, 0x49, 0x4d, 0x47, 0x4f, 0x47, 0x42, - 0x47, 0x4a, 0x4d, 0x4f, 0x46, 0x4d, 0x51, 0x49, 0x48, 0x4d, 0x4e, 0x46, - 0x47, 0x41, 0x44, 0x4d, 0x4b, 0x55, 0x4b, 0x4c, 0x41, 0x5e, 0x50, 0x45, - 0x40, 0x55, 0x4b, 0x60, 0x55, 0x47, 0x3d, 0x4a, 0x42, 0x22, 0x46, 0x5a, - 0x47, 0x53, 0x49, 0x44, 0x44, 0x27, 0x41, 0x4f, 0x3e, 0x22, 0x4a, 0x44, - 0x49, 0x3e, 0x4e, 0x4d, 0x3f, 0x3a, 0x4c, 0x44, 0x4a, 0x44, 0x46, 0x51, - 0x4f, 0x42, 0x4c, 0x4e, 0x39, 0x4b, 0x42, 0x39, 0x4b, 0x3e, 0x4f, 0x47, - 0x4a, 0x4f, 0x3f, 0x4d, 0x43, 0x4c, 0x4a, 0x4b, 0x4b, 0x3d, 0x51, 0x46, - 0x49, 0x4c, 0x47, 0x44, 0x43, 0x3d, 0x3c, 0x54, 0x4a, 0x47, 0x4d, 0x50, - 0x4a, 0x46, 0x51, 0x62, 0x46, 0x4d, 0x4b, 0x46, 0x49, 0x3c, 0x50, 0x57, - 0x47, 0x40, 0x3e, 0x4c, 0x4b, 0x3f, 0x55, 0x46, 0x3d, 0x45, 0x42, 0x4e, - 0x50, 0x49, 0x46, 0x3a, 0x4c, 0x47, 0x4a, 0x49, 0x42, 0x42, 0x4a, 0x44, - 0x42, 0x40, 0x49, 0x54, 0x46, 0x4b, 0x47, 0x45, 0x51, 0x47, 0x41, 0x42, - 0x49, 0x50, 0x4e, 0x48, 0x4b, 0x4b, 0x47, 0x4a, 0x47, 0x49, 0x4b, 0x45, - 0x4b, 0x54, 0x48, 0x54, 0x4b, 0x49, 0x51, 0x4a, 0x4a, 0x40, 0x46, 0x42, - 0x44, 0x44, 0x4d, 0x4b, 0x47, 0x43, 0x45, 0x41, 0x3e, 0x49, 0x43, 0x51, - 0x3e, 0x4b, 0x52, 0x46, 0x48, 0x3f, 0x4e, 0x51, 0x51, 0x49, 0x3f, 0x48, - 0x4c, 0x4c, 0x52, 0x47, 0x43, 0x57, 0x44, 0x42, 0x40, 0x52, 0x50, 0x5d, - 0x4f, 0x40, 0x42, 0x45, 0x46, 0x26, 0x3c, 0x51, 0x4b, 0x4e, 0x4b, 0x49, - 0x46, 0x35, 0x49, 0x53, 0x49, 0x2b, 0x4d, 0x3e, 0x50, 0x44, 0x4f, 0x54, - 0x46, 0x34, 0x49, 0x4d, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x44, 0x52, 0x41, - 0x4d, 0x4c, 0x52, 0x41, 0x49, 0x3a, 0x4e, 0x49, 0x40, 0x4b, 0x45, 0x4d, - 0x4b, 0x4a, 0x47, 0x49, 0x45, 0x49, 0x4d, 0x50, 0x3e, 0x47, 0x44, 0x51, - 0x4c, 0x41, 0x45, 0x50, 0x47, 0x41, 0x4a, 0x52, 0x4b, 0x3d, 0x4b, 0x5b, - 0x4c, 0x4c, 0x4d, 0x3f, 0x47, 0x44, 0x49, 0x5d, 0x4a, 0x53, 0x44, 0x45, - 0x45, 0x46, 0x3d, 0x4f, 0x50, 0x3b, 0x44, 0x4e, 0x40, 0x41, 0x4c, 0x3a, - 0x4a, 0x45, 0x49, 0x48, 0x45, 0x4a, 0x45, 0x36, 0x45, 0x4d, 0x4c, 0x49, - 0x3f, 0x47, 0x4d, 0x40, 0x53, 0x48, 0x49, 0x4c, 0x47, 0x4f, 0x42, 0x44, - 0x45, 0x40, 0x4a, 0x4c, 0x49, 0x4f, 0x4b, 0x4d, 0x42, 0x45, 0x3e, 0x4a, - 0x48, 0x4a, 0x49, 0x50, 0x4c, 0x53, 0x50, 0x45, 0x4b, 0x4c, 0x46, 0x4f, - 0x44, 0x43, 0x54, 0x50, 0x3f, 0x48, 0x42, 0x4b, 0x43, 0x3f, 0x4d, 0x4c, - 0x43, 0x49, 0x4a, 0x47, 0x54, 0x4b, 0x4f, 0x4d, 0x44, 0x47, 0x49, 0x4e, - 0x4e, 0x55, 0x40, 0x46, 0x44, 0x56, 0x4e, 0x65, 0x4f, 0x3f, 0x43, 0x48, - 0x39, 0x27, 0x43, 0x55, 0x4b, 0x4c, 0x44, 0x46, 0x42, 0x34, 0x44, 0x52, - 0x43, 0x22, 0x4e, 0x41, 0x49, 0x48, 0x49, 0x51, 0x3b, 0x37, 0x4b, 0x40, - 0x4f, 0x45, 0x53, 0x4c, 0x47, 0x46, 0x47, 0x4c, 0x3e, 0x44, 0x45, 0x49, - 0x48, 0x50, 0x45, 0x40, 0x46, 0x4c, 0x47, 0x4d, 0x44, 0x48, 0x49, 0x50, - 0x4f, 0x4a, 0x46, 0x55, 0x4e, 0x42, 0x4c, 0x4c, 0x50, 0x48, 0x3d, 0x55, - 0x46, 0x3e, 0x4a, 0x4b, 0x4f, 0x46, 0x46, 0x60, 0x50, 0x3f, 0x55, 0x40, - 0x42, 0x44, 0x48, 0x63, 0x50, 0x3d, 0x45, 0x4f, 0x4e, 0x41, 0x47, 0x48, - 0x4a, 0x3c, 0x3d, 0x46, 0x3f, 0x42, 0x43, 0x37, 0x4f, 0x4f, 0x50, 0x47, - 0x47, 0x4b, 0x52, 0x40, 0x3f, 0x44, 0x4a, 0x40, 0x4d, 0x44, 0x4e, 0x37, - 0x43, 0x48, 0x47, 0x3f, 0x51, 0x4d, 0x45, 0x42, 0x41, 0x46, 0x3d, 0x53, - 0x4f, 0x4b, 0x54, 0x45, 0x51, 0x40, 0x4a, 0x4a, 0x48, 0x4f, 0x43, 0x4a, - 0x4f, 0x4c, 0x4c, 0x4f, 0x48, 0x4c, 0x44, 0x4e, 0x43, 0x46, 0x4f, 0x4a, - 0x43, 0x41, 0x49, 0x49, 0x47, 0x53, 0x45, 0x49, 0x4e, 0x46, 0x4c, 0x4e, - 0x3c, 0x49, 0x44, 0x45, 0x4c, 0x42, 0x49, 0x41, 0x48, 0x58, 0x54, 0x4d, - 0x35, 0x52, 0x4e, 0x5b, 0x4f, 0x40, 0x3e, 0x46, 0x46, 0x36, 0x3d, 0x60, - 0x4d, 0x49, 0x4a, 0x43, 0x44, 0x36, 0x49, 0x67, 0x4a, 0x2d, 0x4b, 0x40, - 0x3f, 0x49, 0x43, 0x5f, 0x45, 0x3c, 0x49, 0x4c, 0x4a, 0x43, 0x48, 0x55, - 0x49, 0x46, 0x49, 0x46, 0x44, 0x4e, 0x42, 0x4e, 0x40, 0x45, 0x42, 0x52, - 0x4a, 0x40, 0x4a, 0x44, 0x40, 0x45, 0x54, 0x3d, 0x4c, 0x3e, 0x4c, 0x55, - 0x4d, 0x45, 0x4d, 0x51, 0x4a, 0x4b, 0x44, 0x5b, 0x48, 0x3d, 0x3e, 0x46, - 0x4f, 0x4d, 0x3f, 0x62, 0x4d, 0x45, 0x3f, 0x47, 0x47, 0x47, 0x44, 0x5b, - 0x4b, 0x4f, 0x51, 0x4c, 0x4a, 0x47, 0x48, 0x5b, 0x47, 0x40, 0x4a, 0x47, - 0x42, 0x44, 0x46, 0x46, 0x45, 0x48, 0x4a, 0x3f, 0x40, 0x4f, 0x48, 0x3a, - 0x49, 0x52, 0x4a, 0x53, 0x43, 0x4c, 0x4b, 0x4a, 0x4a, 0x4a, 0x4e, 0x42, - 0x4b, 0x46, 0x3d, 0x50, 0x51, 0x4b, 0x4b, 0x4f, 0x50, 0x4c, 0x4f, 0x4c, - 0x4d, 0x41, 0x41, 0x3c, 0x40, 0x43, 0x54, 0x51, 0x48, 0x3d, 0x48, 0x51, - 0x42, 0x42, 0x4c, 0x4e, 0x4d, 0x4b, 0x49, 0x43, 0x48, 0x47, 0x4b, 0x49, - 0x49, 0x4e, 0x4d, 0x46, 0x4c, 0x52, 0x49, 0x49, 0x51, 0x4e, 0x45, 0x47, - 0x44, 0x47, 0x42, 0x4a, 0x46, 0x59, 0x48, 0x48, 0x4b, 0x4f, 0x4c, 0x5e, - 0x5c, 0x45, 0x3f, 0x48, 0x3d, 0x3f, 0x37, 0x5a, 0x4b, 0x4b, 0x45, 0x49, - 0x3e, 0x42, 0x41, 0x6b, 0x49, 0x2d, 0x45, 0x43, 0x47, 0x45, 0x49, 0x61, - 0x3d, 0x3b, 0x49, 0x43, 0x49, 0x4b, 0x4b, 0x55, 0x4b, 0x47, 0x46, 0x46, - 0x48, 0x4d, 0x49, 0x4f, 0x4a, 0x4c, 0x42, 0x51, 0x41, 0x44, 0x45, 0x4f, - 0x4e, 0x44, 0x3f, 0x55, 0x3e, 0x4a, 0x45, 0x50, 0x46, 0x42, 0x41, 0x49, - 0x49, 0x47, 0x49, 0x61, 0x47, 0x40, 0x41, 0x4e, 0x4d, 0x4b, 0x4a, 0x5e, - 0x52, 0x49, 0x4b, 0x52, 0x51, 0x55, 0x42, 0x61, 0x53, 0x4c, 0x48, 0x4a, - 0x4e, 0x48, 0x48, 0x57, 0x4c, 0x40, 0x40, 0x48, 0x45, 0x43, 0x3e, 0x46, - 0x43, 0x4a, 0x45, 0x45, 0x44, 0x4f, 0x44, 0x40, 0x49, 0x48, 0x4e, 0x49, - 0x4a, 0x4e, 0x49, 0x51, 0x46, 0x4f, 0x47, 0x44, 0x42, 0x4d, 0x43, 0x4e, - 0x4f, 0x4d, 0x44, 0x51, 0x47, 0x49, 0x40, 0x57, 0x4b, 0x49, 0x47, 0x4c, - 0x4d, 0x4d, 0x3e, 0x47, 0x45, 0x41, 0x50, 0x4b, 0x4b, 0x45, 0x42, 0x4e, - 0x48, 0x47, 0x4e, 0x4b, 0x56, 0x4c, 0x4f, 0x52, 0x51, 0x49, 0x4d, 0x4a, - 0x4b, 0x52, 0x4d, 0x55, 0x4b, 0x4e, 0x4e, 0x4b, 0x51, 0x57, 0x47, 0x42, - 0x49, 0x48, 0x56, 0x44, 0x52, 0x56, 0x53, 0x5a, 0x63, 0x53, 0x4c, 0x4c, - 0x43, 0x56, 0x3c, 0x57, 0x47, 0x47, 0x4d, 0x52, 0x43, 0x48, 0x45, 0x5f, - 0x45, 0x29, 0x47, 0x45, 0x48, 0x40, 0x41, 0x4b, 0x3f, 0x39, 0x49, 0x4e, - 0x47, 0x55, 0x42, 0x56, 0x4d, 0x43, 0x48, 0x44, 0x45, 0x53, 0x43, 0x46, - 0x49, 0x43, 0x49, 0x4a, 0x40, 0x4e, 0x4a, 0x4a, 0x47, 0x43, 0x45, 0x4d, - 0x4a, 0x47, 0x3f, 0x53, 0x45, 0x43, 0x4b, 0x4c, 0x42, 0x47, 0x47, 0x5f, - 0x48, 0x48, 0x46, 0x44, 0x50, 0x47, 0x41, 0x64, 0x4e, 0x46, 0x49, 0x4a, - 0x4d, 0x55, 0x42, 0x55, 0x46, 0x3d, 0x49, 0x43, 0x52, 0x52, 0x47, 0x52, - 0x4e, 0x46, 0x47, 0x41, 0x49, 0x4d, 0x50, 0x47, 0x42, 0x49, 0x41, 0x42, - 0x4b, 0x48, 0x49, 0x42, 0x4d, 0x48, 0x51, 0x54, 0x43, 0x56, 0x4c, 0x52, - 0x53, 0x4d, 0x54, 0x4a, 0x51, 0x50, 0x48, 0x4c, 0x4e, 0x48, 0x4c, 0x4c, - 0x52, 0x49, 0x4a, 0x4e, 0x4e, 0x41, 0x4f, 0x53, 0x49, 0x52, 0x42, 0x4b, - 0x50, 0x46, 0x50, 0x4a, 0x53, 0x56, 0x46, 0x4f, 0x4b, 0x49, 0x3d, 0x41, - 0x4c, 0x52, 0x42, 0x50, 0x4d, 0x45, 0x4e, 0x51, 0x4b, 0x4c, 0x46, 0x42, - 0x41, 0x4b, 0x40, 0x4a, 0x42, 0x57, 0x4f, 0x43, 0x40, 0x50, 0x4c, 0x51, - 0x4f, 0x48, 0x3a, 0x4e, 0x51, 0x40, 0x49, 0x66, 0x4b, 0x42, 0x48, 0x3c, - 0x5b, 0x47, 0x53, 0x40, 0x4a, 0x48, 0x35, 0x44, 0x5f, 0x50, 0x4a, 0x3c, - 0x41, 0x45, 0x48, 0x3b, 0x42, 0x59, 0x43, 0x4b, 0x48, 0x49, 0x4a, 0x40, - 0x4f, 0x5c, 0x50, 0x54, 0x53, 0x55, 0x4c, 0x4a, 0x43, 0x46, 0x49, 0x47, - 0x49, 0x48, 0x4b, 0x43, 0x42, 0x44, 0x42, 0x46, 0x44, 0x3f, 0x4b, 0x42, - 0x4d, 0x49, 0x41, 0x46, 0x47, 0x51, 0x51, 0x44, 0x4c, 0x54, 0x4e, 0x4b, - 0x42, 0x52, 0x4e, 0x4c, 0x4b, 0x4a, 0x50, 0x4e, 0x44, 0x4b, 0x4e, 0x4e, - 0x4f, 0x42, 0x4b, 0x48, 0x46, 0x43, 0x48, 0x54, 0x4b, 0x4e, 0x48, 0x4f, - 0x4a, 0x4d, 0x43, 0x4e, 0x47, 0x50, 0x4a, 0x44, 0x47, 0x52, 0x46, 0x53, - 0x4a, 0x40, 0x46, 0x54, 0x50, 0x4a, 0x47, 0x51, 0x49, 0x45, 0x4b, 0x4e, - 0x4b, 0x46, 0x4c, 0x4c, 0x52, 0x47, 0x45, 0x45, 0x4a, 0x47, 0x4c, 0x52, - 0x44, 0x51, 0x47, 0x42, 0x47, 0x43, 0x43, 0x49, 0x52, 0x5a, 0x55, 0x3e, - 0x45, 0x4b, 0x4c, 0x46, 0x4f, 0x4b, 0x45, 0x49, 0x4a, 0x4e, 0x4a, 0x50, - 0x3e, 0x4e, 0x42, 0x4e, 0x44, 0x55, 0x3d, 0x4a, 0x4d, 0x49, 0x4d, 0x42, - 0x49, 0x4e, 0x50, 0x44, 0x4b, 0x3c, 0x41, 0x49, 0x51, 0x49, 0x3c, 0x4e, - 0x4c, 0x39, 0x4c, 0x72, 0x44, 0x4b, 0x49, 0x42, 0x5f, 0x48, 0x4a, 0x48, - 0x41, 0x4c, 0x43, 0x40, 0x62, 0x5e, 0x47, 0x3c, 0x4a, 0x4c, 0x55, 0x49, - 0x4b, 0x52, 0x4e, 0x4b, 0x4d, 0x48, 0x4c, 0x3c, 0x3f, 0x4f, 0x4e, 0x48, - 0x45, 0x55, 0x4a, 0x46, 0x48, 0x3d, 0x45, 0x44, 0x4b, 0x4a, 0x46, 0x3a, - 0x4e, 0x44, 0x4d, 0x49, 0x49, 0x49, 0x40, 0x3e, 0x40, 0x47, 0x48, 0x43, - 0x3f, 0x51, 0x46, 0x4c, 0x45, 0x4c, 0x49, 0x44, 0x3e, 0x57, 0x49, 0x4e, - 0x48, 0x3f, 0x48, 0x47, 0x53, 0x4d, 0x50, 0x51, 0x49, 0x42, 0x45, 0x44, - 0x49, 0x49, 0x46, 0x4b, 0x45, 0x49, 0x4f, 0x49, 0x46, 0x48, 0x4c, 0x55, - 0x46, 0x51, 0x48, 0x4a, 0x48, 0x54, 0x4b, 0x5a, 0x4c, 0x47, 0x40, 0x47, - 0x40, 0x55, 0x50, 0x52, 0x4a, 0x4b, 0x4f, 0x49, 0x4b, 0x50, 0x4b, 0x5b, - 0x51, 0x53, 0x4f, 0x4e, 0x49, 0x48, 0x44, 0x52, 0x46, 0x4e, 0x47, 0x48, - 0x44, 0x43, 0x49, 0x55, 0x48, 0x58, 0x4f, 0x46, 0x45, 0x53, 0x45, 0x4a, - 0x4c, 0x4c, 0x49, 0x46, 0x47, 0x4d, 0x41, 0x4d, 0x4f, 0x59, 0x4a, 0x49, - 0x46, 0x4e, 0x44, 0x49, 0x4d, 0x48, 0x54, 0x47, 0x48, 0x4e, 0x48, 0x43, - 0x46, 0x41, 0x46, 0x44, 0x52, 0x46, 0x42, 0x4c, 0x4c, 0x31, 0x4d, 0x6f, - 0x51, 0x4f, 0x4d, 0x43, 0x5c, 0x48, 0x49, 0x49, 0x46, 0x4c, 0x43, 0x3b, - 0x5d, 0x63, 0x58, 0x46, 0x49, 0x45, 0x4e, 0x48, 0x49, 0x5d, 0x45, 0x50, - 0x56, 0x4d, 0x57, 0x37, 0x40, 0x55, 0x43, 0x4b, 0x4e, 0x46, 0x4c, 0x3b, - 0x3d, 0x4b, 0x49, 0x4b, 0x52, 0x47, 0x4d, 0x34, 0x4c, 0x4c, 0x47, 0x4e, - 0x4d, 0x4c, 0x3d, 0x3f, 0x4a, 0x49, 0x44, 0x45, 0x4a, 0x54, 0x43, 0x44, - 0x50, 0x4b, 0x4d, 0x4c, 0x4e, 0x48, 0x46, 0x51, 0x43, 0x48, 0x48, 0x48, - 0x42, 0x44, 0x4e, 0x48, 0x47, 0x45, 0x48, 0x51, 0x53, 0x4a, 0x4f, 0x58, - 0x42, 0x4d, 0x48, 0x4f, 0x4c, 0x45, 0x4a, 0x57, 0x4b, 0x43, 0x4d, 0x4b, - 0x4a, 0x4e, 0x4c, 0x5f, 0x3f, 0x4f, 0x4a, 0x42, 0x4b, 0x48, 0x4d, 0x62, - 0x4f, 0x4b, 0x50, 0x4c, 0x45, 0x49, 0x44, 0x53, 0x4a, 0x4f, 0x45, 0x56, - 0x4b, 0x44, 0x41, 0x53, 0x49, 0x48, 0x4d, 0x49, 0x47, 0x4b, 0x46, 0x4c, - 0x49, 0x4b, 0x4c, 0x54, 0x4f, 0x4b, 0x47, 0x49, 0x44, 0x4a, 0x4e, 0x53, - 0x4f, 0x49, 0x54, 0x4e, 0x4a, 0x48, 0x42, 0x54, 0x51, 0x46, 0x4b, 0x52, - 0x45, 0x48, 0x51, 0x4a, 0x40, 0x4a, 0x50, 0x45, 0x4a, 0x46, 0x49, 0x46, - 0x54, 0x46, 0x42, 0x48, 0x50, 0x36, 0x4a, 0x6b, 0x46, 0x59, 0x51, 0x47, - 0x5f, 0x4d, 0x43, 0x4d, 0x44, 0x4d, 0x42, 0x3b, 0x65, 0x6a, 0x56, 0x48, - 0x4d, 0x4c, 0x52, 0x4a, 0x4d, 0x61, 0x52, 0x4b, 0x47, 0x4f, 0x48, 0x49, - 0x3f, 0x5b, 0x45, 0x51, 0x48, 0x48, 0x4b, 0x3c, 0x3b, 0x4c, 0x54, 0x52, - 0x4f, 0x51, 0x53, 0x31, 0x47, 0x4c, 0x45, 0x4a, 0x42, 0x4b, 0x47, 0x40, - 0x41, 0x49, 0x4c, 0x46, 0x4b, 0x53, 0x46, 0x49, 0x44, 0x4b, 0x4e, 0x4b, - 0x48, 0x51, 0x49, 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x45, 0x43, 0x46, 0x56, - 0x42, 0x4b, 0x49, 0x4e, 0x4e, 0x53, 0x42, 0x5c, 0x4b, 0x46, 0x49, 0x46, - 0x4e, 0x41, 0x42, 0x67, 0x41, 0x49, 0x4d, 0x48, 0x49, 0x4e, 0x3f, 0x61, - 0x48, 0x4a, 0x40, 0x42, 0x4c, 0x51, 0x50, 0x63, 0x49, 0x44, 0x49, 0x47, - 0x45, 0x4d, 0x49, 0x61, 0x3f, 0x48, 0x40, 0x41, 0x49, 0x49, 0x45, 0x57, - 0x45, 0x46, 0x4d, 0x46, 0x4c, 0x4a, 0x4d, 0x4b, 0x43, 0x54, 0x4b, 0x49, - 0x4c, 0x49, 0x41, 0x49, 0x4b, 0x47, 0x45, 0x4b, 0x44, 0x43, 0x46, 0x3f, - 0x47, 0x47, 0x43, 0x4c, 0x49, 0x4c, 0x3d, 0x4d, 0x4b, 0x54, 0x4a, 0x4f, - 0x44, 0x4c, 0x4b, 0x47, 0x4c, 0x45, 0x3d, 0x52, 0x58, 0x4b, 0x45, 0x4e, - 0x48, 0x39, 0x53, 0x70, 0x4a, 0x5d, 0x4c, 0x4e, 0x5a, 0x4f, 0x46, 0x4b, - 0x3e, 0x4f, 0x44, 0x3d, 0x66, 0x6b, 0x50, 0x4d, 0x4d, 0x57, 0x52, 0x4a, - 0x4c, 0x5b, 0x4e, 0x53, 0x4d, 0x54, 0x50, 0x42, 0x3c, 0x5d, 0x4a, 0x4c, - 0x56, 0x52, 0x50, 0x40, 0x48, 0x4c, 0x4d, 0x49, 0x49, 0x4f, 0x51, 0x38, - 0x42, 0x49, 0x4d, 0x4f, 0x45, 0x40, 0x4d, 0x41, 0x4b, 0x4a, 0x47, 0x51, - 0x4b, 0x53, 0x4c, 0x4a, 0x51, 0x4c, 0x42, 0x56, 0x48, 0x4a, 0x47, 0x58, - 0x49, 0x46, 0x52, 0x4a, 0x45, 0x47, 0x51, 0x54, 0x4f, 0x50, 0x50, 0x53, - 0x49, 0x4a, 0x4d, 0x56, 0x56, 0x4b, 0x4d, 0x45, 0x40, 0x4d, 0x48, 0x60, - 0x4e, 0x56, 0x48, 0x4b, 0x47, 0x45, 0x47, 0x62, 0x4e, 0x4f, 0x41, 0x49, - 0x48, 0x57, 0x44, 0x64, 0x4f, 0x4f, 0x49, 0x44, 0x49, 0x4c, 0x3f, 0x53, - 0x40, 0x41, 0x4e, 0x4b, 0x4d, 0x54, 0x42, 0x53, 0x4e, 0x41, 0x49, 0x44, - 0x41, 0x45, 0x4d, 0x4f, 0x47, 0x51, 0x45, 0x4a, 0x42, 0x45, 0x4e, 0x40, - 0x4b, 0x52, 0x48, 0x47, 0x4e, 0x4f, 0x47, 0x41, 0x48, 0x53, 0x47, 0x47, - 0x46, 0x42, 0x48, 0x4b, 0x42, 0x4c, 0x49, 0x4c, 0x45, 0x4c, 0x54, 0x45, - 0x4c, 0x43, 0x4e, 0x49, 0x56, 0x47, 0x45, 0x4f, 0x4d, 0x3a, 0x58, 0x74, - 0x49, 0x5b, 0x4c, 0x4f, 0x64, 0x4e, 0x45, 0x43, 0x44, 0x5b, 0x43, 0x41, - 0x63, 0x70, 0x55, 0x45, 0x4a, 0x4a, 0x4d, 0x51, 0x4b, 0x5a, 0x51, 0x57, - 0x54, 0x5b, 0x55, 0x44, 0x38, 0x57, 0x4e, 0x50, 0x4e, 0x56, 0x57, 0x3a, - 0x3a, 0x4b, 0x57, 0x4c, 0x51, 0x53, 0x4d, 0x3b, 0x44, 0x43, 0x47, 0x4c, - 0x48, 0x59, 0x51, 0x41, 0x43, 0x44, 0x51, 0x51, 0x4a, 0x54, 0x51, 0x4b, - 0x4e, 0x45, 0x51, 0x4a, 0x49, 0x4a, 0x4f, 0x52, 0x4c, 0x3e, 0x4e, 0x55, - 0x42, 0x46, 0x46, 0x4a, 0x42, 0x52, 0x49, 0x47, 0x4a, 0x56, 0x4f, 0x50, - 0x46, 0x4f, 0x43, 0x51, 0x53, 0x46, 0x40, 0x60, 0x44, 0x4d, 0x46, 0x54, - 0x3d, 0x49, 0x43, 0x64, 0x45, 0x4d, 0x50, 0x49, 0x4f, 0x4d, 0x53, 0x60, - 0x4a, 0x52, 0x49, 0x47, 0x48, 0x5a, 0x48, 0x58, 0x4e, 0x4f, 0x43, 0x4f, - 0x50, 0x51, 0x41, 0x52, 0x4c, 0x4d, 0x45, 0x42, 0x41, 0x4c, 0x44, 0x54, - 0x4e, 0x4d, 0x4a, 0x47, 0x40, 0x4a, 0x3e, 0x47, 0x4c, 0x58, 0x46, 0x46, - 0x55, 0x4c, 0x4d, 0x45, 0x49, 0x51, 0x53, 0x46, 0x46, 0x43, 0x43, 0x48, - 0x52, 0x3d, 0x4b, 0x4e, 0x49, 0x47, 0x3f, 0x3d, 0x4f, 0x45, 0x44, 0x3f, - 0x5a, 0x43, 0x4b, 0x4d, 0x51, 0x35, 0x54, 0x76, 0x4f, 0x5e, 0x4c, 0x50, - 0x5a, 0x51, 0x46, 0x49, 0x44, 0x61, 0x4f, 0x41, 0x67, 0x72, 0x56, 0x4f, - 0x42, 0x48, 0x4b, 0x52, 0x46, 0x60, 0x50, 0x4e, 0x4a, 0x5b, 0x5f, 0x46, - 0x31, 0x5b, 0x4a, 0x48, 0x4b, 0x58, 0x51, 0x41, 0x37, 0x4e, 0x4f, 0x55, - 0x51, 0x5c, 0x4f, 0x42, 0x4b, 0x4e, 0x4f, 0x54, 0x4f, 0x52, 0x43, 0x43, - 0x48, 0x53, 0x53, 0x41, 0x4b, 0x49, 0x4e, 0x50, 0x46, 0x4c, 0x4f, 0x49, - 0x42, 0x49, 0x4c, 0x4c, 0x4c, 0x41, 0x4e, 0x48, 0x47, 0x4c, 0x49, 0x53, - 0x44, 0x46, 0x51, 0x53, 0x45, 0x52, 0x4e, 0x53, 0x50, 0x58, 0x42, 0x45, - 0x44, 0x42, 0x48, 0x58, 0x4e, 0x4d, 0x54, 0x56, 0x4c, 0x46, 0x4a, 0x58, - 0x48, 0x4f, 0x47, 0x51, 0x47, 0x4f, 0x4f, 0x5b, 0x41, 0x4e, 0x45, 0x45, - 0x4a, 0x50, 0x3e, 0x57, 0x48, 0x4e, 0x41, 0x4c, 0x45, 0x51, 0x46, 0x4c, - 0x46, 0x4f, 0x42, 0x45, 0x4b, 0x4c, 0x49, 0x4c, 0x44, 0x4f, 0x4e, 0x4d, - 0x48, 0x56, 0x43, 0x48, 0x42, 0x54, 0x48, 0x43, 0x3e, 0x51, 0x43, 0x47, - 0x47, 0x47, 0x49, 0x4d, 0x46, 0x4e, 0x52, 0x42, 0x48, 0x4e, 0x4c, 0x4a, - 0x4d, 0x3e, 0x43, 0x40, 0x48, 0x41, 0x47, 0x4f, 0x5e, 0x49, 0x40, 0x4c, - 0x50, 0x42, 0x56, 0x75, 0x51, 0x5e, 0x51, 0x4e, 0x62, 0x58, 0x49, 0x47, - 0x51, 0x59, 0x46, 0x46, 0x6c, 0x72, 0x55, 0x44, 0x4c, 0x4a, 0x4d, 0x59, - 0x53, 0x64, 0x4d, 0x51, 0x55, 0x5e, 0x59, 0x50, 0x30, 0x58, 0x50, 0x4c, - 0x4c, 0x60, 0x59, 0x42, 0x32, 0x53, 0x50, 0x55, 0x4d, 0x53, 0x59, 0x43, - 0x3e, 0x49, 0x4f, 0x52, 0x4d, 0x51, 0x47, 0x45, 0x4d, 0x4e, 0x53, 0x4e, - 0x54, 0x4f, 0x4d, 0x4d, 0x4e, 0x40, 0x47, 0x53, 0x53, 0x49, 0x56, 0x4d, - 0x4d, 0x3a, 0x4c, 0x4e, 0x45, 0x4a, 0x47, 0x45, 0x53, 0x4a, 0x4e, 0x52, - 0x4d, 0x4e, 0x48, 0x56, 0x4e, 0x4a, 0x4d, 0x52, 0x49, 0x4e, 0x4e, 0x58, - 0x47, 0x50, 0x4c, 0x54, 0x49, 0x42, 0x46, 0x54, 0x50, 0x54, 0x54, 0x46, - 0x40, 0x49, 0x4b, 0x57, 0x4b, 0x59, 0x44, 0x46, 0x52, 0x55, 0x51, 0x55, - 0x4f, 0x50, 0x4d, 0x4d, 0x48, 0x50, 0x4e, 0x49, 0x4e, 0x42, 0x45, 0x3f, - 0x4d, 0x4f, 0x51, 0x47, 0x4a, 0x4c, 0x4b, 0x4b, 0x46, 0x4d, 0x44, 0x52, - 0x4d, 0x44, 0x40, 0x4d, 0x54, 0x46, 0x54, 0x44, 0x4b, 0x46, 0x47, 0x45, - 0x50, 0x45, 0x45, 0x4b, 0x4c, 0x48, 0x3f, 0x55, 0x4a, 0x45, 0x49, 0x4e, - 0x40, 0x49, 0x4a, 0x41, 0x56, 0x4b, 0x49, 0x4e, 0x4a, 0x41, 0x50, 0x70, - 0x56, 0x59, 0x4b, 0x55, 0x58, 0x59, 0x49, 0x47, 0x4a, 0x5a, 0x4c, 0x46, - 0x62, 0x7b, 0x58, 0x51, 0x44, 0x47, 0x44, 0x57, 0x4f, 0x65, 0x4e, 0x50, - 0x4d, 0x67, 0x5c, 0x4a, 0x2b, 0x61, 0x48, 0x4b, 0x4b, 0x5d, 0x5c, 0x48, - 0x39, 0x50, 0x45, 0x4d, 0x53, 0x60, 0x53, 0x46, 0x42, 0x46, 0x50, 0x45, - 0x4f, 0x4e, 0x46, 0x4a, 0x4d, 0x51, 0x54, 0x47, 0x59, 0x4b, 0x58, 0x4a, - 0x50, 0x3d, 0x59, 0x48, 0x45, 0x4e, 0x4e, 0x47, 0x4f, 0x47, 0x4d, 0x4b, - 0x52, 0x42, 0x4c, 0x48, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x4c, 0x4d, 0x51, - 0x49, 0x4f, 0x4c, 0x47, 0x47, 0x48, 0x47, 0x59, 0x4f, 0x4f, 0x53, 0x49, - 0x4e, 0x4b, 0x4f, 0x5a, 0x50, 0x42, 0x47, 0x50, 0x4a, 0x54, 0x47, 0x5a, - 0x43, 0x49, 0x47, 0x4e, 0x49, 0x4d, 0x43, 0x54, 0x4c, 0x53, 0x4e, 0x4e, - 0x42, 0x43, 0x48, 0x46, 0x4f, 0x43, 0x43, 0x45, 0x51, 0x47, 0x4b, 0x4f, - 0x56, 0x48, 0x48, 0x49, 0x46, 0x45, 0x4d, 0x52, 0x47, 0x4b, 0x46, 0x50, - 0x3e, 0x4e, 0x4c, 0x43, 0x45, 0x4d, 0x53, 0x43, 0x46, 0x45, 0x44, 0x52, - 0x45, 0x49, 0x49, 0x51, 0x3d, 0x4a, 0x4d, 0x46, 0x42, 0x41, 0x4e, 0x48, - 0x5a, 0x49, 0x49, 0x49, 0x4f, 0x3d, 0x56, 0x68, 0x56, 0x67, 0x4b, 0x57, - 0x5f, 0x5c, 0x40, 0x4a, 0x4a, 0x54, 0x4c, 0x47, 0x64, 0x7a, 0x54, 0x48, - 0x46, 0x45, 0x46, 0x57, 0x4e, 0x61, 0x4f, 0x50, 0x4d, 0x64, 0x5b, 0x43, - 0x2d, 0x60, 0x55, 0x51, 0x4c, 0x54, 0x4f, 0x4e, 0x2f, 0x50, 0x4f, 0x52, - 0x50, 0x61, 0x54, 0x4b, 0x3d, 0x4c, 0x47, 0x51, 0x4a, 0x54, 0x4b, 0x42, - 0x3b, 0x55, 0x47, 0x50, 0x4f, 0x49, 0x4a, 0x46, 0x43, 0x44, 0x45, 0x47, - 0x46, 0x4b, 0x4f, 0x46, 0x43, 0x47, 0x4a, 0x4e, 0x51, 0x43, 0x55, 0x47, - 0x4d, 0x46, 0x4c, 0x4c, 0x49, 0x4d, 0x43, 0x51, 0x47, 0x51, 0x52, 0x4a, - 0x46, 0x4f, 0x49, 0x52, 0x50, 0x4a, 0x43, 0x53, 0x46, 0x4e, 0x50, 0x54, - 0x45, 0x3a, 0x4a, 0x4a, 0x4c, 0x50, 0x4b, 0x54, 0x43, 0x4f, 0x4e, 0x45, - 0x49, 0x4f, 0x46, 0x53, 0x4d, 0x51, 0x52, 0x53, 0x3d, 0x4a, 0x47, 0x4e, - 0x43, 0x4a, 0x53, 0x48, 0x4a, 0x4c, 0x4a, 0x4a, 0x42, 0x53, 0x3e, 0x43, - 0x4f, 0x4c, 0x47, 0x48, 0x54, 0x4d, 0x48, 0x48, 0x4e, 0x4c, 0x43, 0x51, - 0x42, 0x49, 0x44, 0x3e, 0x49, 0x51, 0x4a, 0x4d, 0x4f, 0x49, 0x45, 0x44, - 0x4e, 0x41, 0x48, 0x4b, 0x4c, 0x49, 0x46, 0x47, 0x5d, 0x4c, 0x4d, 0x50, - 0x45, 0x40, 0x4e, 0x6a, 0x4f, 0x62, 0x53, 0x50, 0x5c, 0x5e, 0x4a, 0x4c, - 0x50, 0x56, 0x52, 0x42, 0x60, 0x7e, 0x5b, 0x4b, 0x43, 0x41, 0x4c, 0x56, - 0x46, 0x5f, 0x4d, 0x49, 0x43, 0x65, 0x5c, 0x4d, 0x2c, 0x61, 0x48, 0x4c, - 0x44, 0x55, 0x5c, 0x49, 0x37, 0x54, 0x4e, 0x57, 0x52, 0x5c, 0x50, 0x49, - 0x3e, 0x4d, 0x4f, 0x4f, 0x51, 0x4c, 0x48, 0x43, 0x4a, 0x5a, 0x4d, 0x4b, - 0x4e, 0x58, 0x54, 0x49, 0x51, 0x42, 0x49, 0x4f, 0x46, 0x45, 0x52, 0x3d, - 0x4b, 0x4b, 0x43, 0x54, 0x47, 0x47, 0x4c, 0x42, 0x4b, 0x49, 0x45, 0x46, - 0x46, 0x4a, 0x51, 0x47, 0x47, 0x4f, 0x48, 0x4a, 0x3f, 0x4c, 0x4b, 0x57, - 0x4a, 0x3f, 0x52, 0x4a, 0x56, 0x52, 0x4b, 0x54, 0x4c, 0x3e, 0x3f, 0x4f, - 0x4b, 0x50, 0x4c, 0x53, 0x4a, 0x49, 0x46, 0x4e, 0x50, 0x48, 0x4f, 0x4b, - 0x4a, 0x4e, 0x3e, 0x49, 0x45, 0x42, 0x42, 0x41, 0x47, 0x4b, 0x4f, 0x42, - 0x49, 0x4c, 0x55, 0x4c, 0x4e, 0x42, 0x47, 0x42, 0x4b, 0x48, 0x46, 0x41, - 0x46, 0x4e, 0x4d, 0x3f, 0x4f, 0x46, 0x4f, 0x4b, 0x4b, 0x4d, 0x50, 0x3e, - 0x42, 0x43, 0x44, 0x4a, 0x49, 0x40, 0x4e, 0x43, 0x3e, 0x52, 0x3e, 0x44, - 0x49, 0x43, 0x4d, 0x44, 0x62, 0x51, 0x42, 0x53, 0x51, 0x40, 0x4c, 0x64, - 0x4f, 0x63, 0x4e, 0x5c, 0x5b, 0x5c, 0x48, 0x4d, 0x4a, 0x57, 0x4f, 0x42, - 0x65, 0xfe, 0x5c, 0x4e, 0x47, 0x43, 0x4a, 0x58, 0x4e, 0x5e, 0x48, 0x4c, - 0x51, 0x5e, 0x60, 0x56, 0x2f, 0x62, 0x54, 0x58, 0x51, 0x52, 0x55, 0x51, - 0x36, 0x4b, 0x46, 0x51, 0x53, 0x5f, 0x46, 0x4c, 0x37, 0x4d, 0x4a, 0x45, - 0x4b, 0x3f, 0x41, 0x42, 0x3f, 0x53, 0x4a, 0x48, 0x49, 0x4a, 0x4a, 0x45, - 0x52, 0x3f, 0x52, 0x52, 0x45, 0x4d, 0x4f, 0x45, 0x46, 0x4a, 0x51, 0x48, - 0x56, 0x47, 0x50, 0x3e, 0x46, 0x49, 0x4c, 0x51, 0x49, 0x54, 0x45, 0x4f, - 0x4b, 0x4b, 0x49, 0x46, 0x4b, 0x4d, 0x49, 0x5c, 0x4d, 0x43, 0x47, 0x49, - 0x48, 0x52, 0x46, 0x50, 0x51, 0x37, 0x50, 0x52, 0x4c, 0x4d, 0x4f, 0x51, - 0x4f, 0x42, 0x50, 0x47, 0x48, 0x4e, 0x4d, 0x4c, 0x48, 0x48, 0x4a, 0x51, - 0x49, 0x42, 0x50, 0x4f, 0x43, 0x4e, 0x47, 0x4b, 0x47, 0x4a, 0x44, 0x44, - 0x4c, 0x51, 0x49, 0x44, 0x45, 0x45, 0x45, 0x48, 0x3f, 0x4a, 0x43, 0x49, - 0x46, 0x49, 0x4c, 0x4d, 0x45, 0x50, 0x44, 0x45, 0x44, 0x55, 0x4a, 0x45, - 0x48, 0x47, 0x4c, 0x43, 0x3f, 0x48, 0x42, 0x43, 0x43, 0x43, 0x48, 0x46, - 0x5c, 0x51, 0x47, 0x51, 0x48, 0x40, 0x54, 0x66, 0x4e, 0x67, 0x4d, 0x5a, - 0x60, 0x57, 0x47, 0x4d, 0x4d, 0x58, 0x53, 0x46, 0x66, 0x7e, 0x56, 0x48, - 0x44, 0x4f, 0x49, 0x5c, 0x4a, 0x63, 0x50, 0x4c, 0x49, 0x56, 0x61, 0x50, - 0x2c, 0x68, 0x4d, 0x51, 0x46, 0x4e, 0x5b, 0x51, 0x2e, 0x53, 0x54, 0x50, - 0x46, 0x58, 0x44, 0x4f, 0x37, 0x48, 0x55, 0x50, 0x49, 0x49, 0x4e, 0x46, - 0x43, 0x56, 0x52, 0x4e, 0x50, 0x4b, 0x50, 0x4c, 0x49, 0x40, 0x4d, 0x4f, - 0x50, 0x41, 0x44, 0x39, 0x4b, 0x4d, 0x4b, 0x41, 0x51, 0x4d, 0x4c, 0x41, - 0x3f, 0x52, 0x4e, 0x4b, 0x49, 0x53, 0x45, 0x43, 0x4d, 0x4f, 0x44, 0x4d, - 0x4b, 0x53, 0x50, 0x4e, 0x45, 0x3f, 0x4e, 0x51, 0x50, 0x55, 0x4f, 0x51, - 0x4d, 0x3d, 0x58, 0x3f, 0x46, 0x50, 0x50, 0x50, 0x56, 0x42, 0x49, 0x49, - 0x50, 0x4f, 0x42, 0x4b, 0x4c, 0x45, 0x52, 0x41, 0x46, 0x43, 0x4c, 0x4a, - 0x4c, 0x51, 0x4d, 0x4d, 0x4a, 0x49, 0x54, 0x49, 0x58, 0x53, 0x49, 0x45, - 0x47, 0x4c, 0x4c, 0x44, 0x4e, 0x51, 0x4c, 0x4c, 0x47, 0x48, 0x4c, 0x4e, - 0x49, 0x54, 0x4c, 0x51, 0x49, 0x48, 0x47, 0x45, 0x42, 0x49, 0x42, 0x51, - 0x4e, 0x3f, 0x49, 0x41, 0x50, 0x3e, 0x4d, 0x50, 0x5c, 0x51, 0x4d, 0x56, - 0x47, 0x48, 0x58, 0x65, 0x51, 0x6b, 0x56, 0x5b, 0x56, 0x55, 0x46, 0x49, - 0x4b, 0x58, 0x59, 0x4a, 0x68, 0x79, 0x53, 0x46, 0x45, 0x4b, 0x53, 0x5d, - 0x4b, 0x6f, 0x4e, 0x4f, 0x4c, 0x53, 0x5b, 0x52, 0x30, 0x63, 0x46, 0x57, - 0x46, 0x50, 0x4b, 0x48, 0x2e, 0x4c, 0x46, 0x48, 0x44, 0x51, 0x46, 0x4a, - 0x35, 0x55, 0x43, 0x4c, 0x43, 0x4d, 0x4e, 0x3e, 0x47, 0x56, 0x50, 0x4d, - 0x44, 0x59, 0x4c, 0x51, 0x46, 0x42, 0x4e, 0x43, 0x4c, 0x44, 0x42, 0x3a, - 0x40, 0x48, 0x46, 0x44, 0x45, 0x4a, 0x46, 0x3a, 0x53, 0x4c, 0x4d, 0x4c, - 0x4a, 0x4f, 0x53, 0x40, 0x4b, 0x48, 0x54, 0x4b, 0x44, 0x59, 0x41, 0x50, - 0x4e, 0x50, 0x55, 0x4d, 0x55, 0x41, 0x4a, 0x4f, 0x47, 0x43, 0x4e, 0x50, - 0x52, 0x4c, 0x50, 0x4d, 0x47, 0x42, 0x4f, 0x4b, 0x47, 0x43, 0x41, 0x4a, - 0x55, 0x3e, 0x50, 0x4b, 0x41, 0x49, 0x47, 0x49, 0x53, 0x4d, 0x48, 0x4b, - 0x43, 0x43, 0x51, 0x44, 0x4d, 0x4c, 0x44, 0x50, 0x4d, 0x42, 0x49, 0x4e, - 0x50, 0x50, 0x4c, 0x49, 0x49, 0x51, 0x46, 0x43, 0x4a, 0x4e, 0x53, 0x47, - 0x43, 0x46, 0x40, 0x49, 0x47, 0x44, 0x44, 0x4d, 0x4b, 0x4b, 0x51, 0x4b, - 0x45, 0x49, 0x47, 0x43, 0x56, 0x49, 0x4c, 0x54, 0x50, 0x3c, 0x4c, 0x5e, - 0x51, 0x67, 0x4f, 0x57, 0x57, 0x53, 0x3e, 0x4e, 0x4e, 0x5e, 0x4b, 0x48, - 0x5a, 0x78, 0x55, 0x4a, 0x3f, 0x4b, 0x4c, 0x5b, 0x53, 0x64, 0x4d, 0x53, - 0x49, 0x57, 0x57, 0x58, 0x37, 0x62, 0x4f, 0x56, 0x44, 0x4e, 0x58, 0x4a, - 0x30, 0x4f, 0x40, 0x4e, 0x47, 0x58, 0x52, 0x50, 0x35, 0x4d, 0x49, 0x52, - 0x4e, 0x42, 0x46, 0x47, 0x44, 0x57, 0x54, 0x43, 0x4e, 0x56, 0x43, 0x49, - 0x44, 0x40, 0x44, 0x41, 0x50, 0x49, 0x4b, 0x44, 0x4d, 0x52, 0x49, 0x43, - 0x52, 0x54, 0x49, 0x3f, 0x49, 0x42, 0x49, 0x4a, 0x43, 0x3e, 0x50, 0x40, - 0x46, 0x4b, 0x50, 0x4b, 0x53, 0x4b, 0x47, 0x52, 0x51, 0x4b, 0x47, 0x3f, - 0x46, 0x4b, 0x4c, 0x57, 0x49, 0x47, 0x54, 0x49, 0x50, 0x50, 0x4d, 0x4a, - 0x42, 0x4e, 0x51, 0x4c, 0x47, 0x47, 0x42, 0x43, 0x54, 0x43, 0x46, 0x47, - 0x4d, 0x43, 0x54, 0x47, 0x43, 0x58, 0x48, 0x45, 0x4b, 0x46, 0x48, 0x3d, - 0x47, 0x3f, 0x44, 0x4f, 0x4e, 0x46, 0x41, 0x40, 0x4d, 0x4d, 0x4d, 0x52, - 0x54, 0x47, 0x4f, 0x51, 0x4f, 0x45, 0x45, 0x48, 0x4b, 0x4d, 0x44, 0x52, - 0x51, 0x4b, 0x48, 0x4f, 0x49, 0x49, 0x46, 0x50, 0x54, 0x42, 0x44, 0x51, - 0x58, 0x4e, 0x43, 0x58, 0x55, 0x40, 0x53, 0x5a, 0x51, 0x61, 0x51, 0x60, - 0x53, 0x57, 0x45, 0x4f, 0x45, 0x5e, 0x51, 0x42, 0x61, 0x7a, 0x55, 0x47, - 0x41, 0x4b, 0x4a, 0x5b, 0x4c, 0x65, 0x4f, 0x55, 0x46, 0x54, 0x65, 0x59, - 0x36, 0x61, 0x54, 0x55, 0x48, 0x57, 0x52, 0x4e, 0x24, 0x4b, 0x49, 0x4d, - 0x43, 0x57, 0x44, 0x51, 0x3b, 0x4f, 0x45, 0x40, 0x47, 0x4a, 0x43, 0x47, - 0x46, 0x58, 0x50, 0x54, 0x4d, 0x50, 0x44, 0x42, 0x4a, 0x46, 0x4b, 0x4d, - 0x4f, 0x4f, 0x4d, 0x40, 0x48, 0x4a, 0x53, 0x48, 0x49, 0x48, 0x4d, 0x39, - 0x47, 0x4e, 0x44, 0x4c, 0x4b, 0x49, 0x44, 0x42, 0x4a, 0x45, 0x46, 0x46, - 0x53, 0x4d, 0x49, 0x4f, 0x4e, 0x48, 0x50, 0x4a, 0x4c, 0x46, 0x56, 0x4b, - 0x4b, 0x57, 0x4c, 0x49, 0x4a, 0x4a, 0x43, 0x4e, 0x56, 0x45, 0x50, 0x4c, - 0x47, 0x55, 0x48, 0x46, 0x4e, 0x46, 0x45, 0x3f, 0x4a, 0x4c, 0x4c, 0x47, - 0x4a, 0x51, 0x4e, 0x50, 0x40, 0x52, 0x45, 0x45, 0x4b, 0x46, 0x4f, 0x44, - 0x51, 0x4a, 0x4e, 0x4d, 0x4c, 0x46, 0x42, 0x47, 0x4a, 0x4e, 0x46, 0x42, - 0x4b, 0x4f, 0x4b, 0x4e, 0x4e, 0x46, 0x42, 0x50, 0x53, 0x51, 0x4f, 0x54, - 0x45, 0x4f, 0x45, 0x42, 0x4c, 0x45, 0x40, 0x48, 0x59, 0x49, 0x49, 0x53, - 0x4c, 0x43, 0x4b, 0x57, 0x54, 0x64, 0x4e, 0x5f, 0x5c, 0x59, 0x4b, 0x56, - 0x49, 0x5d, 0x4f, 0x4b, 0x62, 0x73, 0x54, 0x45, 0x49, 0x50, 0x48, 0x5a, - 0x50, 0x6d, 0x4a, 0x4e, 0x48, 0x55, 0x5d, 0x57, 0x38, 0x68, 0x52, 0x5a, - 0x46, 0x56, 0x4c, 0x5a, 0x2e, 0x55, 0x49, 0x4f, 0x4a, 0x57, 0x4f, 0x54, - 0x41, 0x53, 0x46, 0x43, 0x45, 0x47, 0x53, 0x4a, 0x42, 0x4f, 0x4d, 0x48, - 0x4c, 0x49, 0x47, 0x48, 0x45, 0x49, 0x48, 0x53, 0x48, 0x52, 0x4a, 0x44, - 0x4c, 0x49, 0x52, 0x4b, 0x47, 0x51, 0x42, 0x47, 0x49, 0x51, 0x3f, 0x45, - 0x47, 0x4e, 0x53, 0x33, 0x55, 0x51, 0x55, 0x48, 0x4b, 0x51, 0x56, 0x47, - 0x43, 0x55, 0x47, 0x42, 0x47, 0x4f, 0x47, 0x51, 0x46, 0x55, 0x4a, 0x4b, - 0x50, 0x52, 0x4f, 0x43, 0x4b, 0x53, 0x4d, 0x3f, 0x4e, 0x56, 0x50, 0x49, - 0x4d, 0x47, 0x51, 0x49, 0x4a, 0x52, 0x44, 0x43, 0x4d, 0x4e, 0x41, 0x51, - 0x4c, 0x4d, 0x47, 0x48, 0x4f, 0x40, 0x50, 0x46, 0x43, 0x4d, 0x4e, 0x50, - 0x43, 0x47, 0x4e, 0x46, 0x4f, 0x4b, 0x51, 0x4b, 0x4a, 0x57, 0x42, 0x51, - 0x4c, 0x54, 0x52, 0x42, 0x4c, 0x42, 0x47, 0x54, 0x4a, 0x4a, 0x47, 0x4a, - 0x3f, 0x46, 0x4e, 0x4c, 0x53, 0x50, 0x47, 0x53, 0x49, 0x44, 0x52, 0x5a, - 0x4b, 0x65, 0x50, 0x5b, 0x57, 0x59, 0x4a, 0x48, 0x48, 0x5f, 0x55, 0x48, - 0x5c, 0x78, 0x55, 0x48, 0x4a, 0x4b, 0x49, 0x4c, 0x46, 0x6b, 0x54, 0x57, - 0x55, 0x4b, 0x59, 0x52, 0x38, 0x5b, 0x57, 0x56, 0x4b, 0x4f, 0x48, 0x4e, - 0x34, 0x5a, 0x4e, 0x4f, 0x43, 0x4e, 0x4b, 0x4e, 0x36, 0x4d, 0x52, 0x48, - 0x4d, 0x4c, 0x4c, 0x49, 0x51, 0x54, 0x45, 0x54, 0x4a, 0x4e, 0x52, 0x41, - 0x4c, 0x45, 0x4a, 0x53, 0x55, 0x4b, 0x50, 0x47, 0x4e, 0x4d, 0x43, 0x51, - 0x4e, 0x4a, 0x51, 0x46, 0x4e, 0x4d, 0x48, 0x3f, 0x43, 0x52, 0x56, 0x38, - 0x52, 0x46, 0x43, 0x49, 0x40, 0x49, 0x53, 0x41, 0x47, 0x41, 0x41, 0x42, - 0x4f, 0x4b, 0x46, 0x4b, 0x4a, 0x57, 0x4a, 0x45, 0x4b, 0x46, 0x47, 0x3c, - 0x43, 0x46, 0x4f, 0x50, 0x4c, 0x53, 0x4f, 0x41, 0x4a, 0x4a, 0x40, 0x4a, - 0x3e, 0x4e, 0x4d, 0x41, 0x4a, 0x42, 0x49, 0x4c, 0x51, 0x46, 0x4f, 0x43, - 0x4b, 0x41, 0x50, 0x48, 0x4a, 0x40, 0x52, 0x45, 0x40, 0x40, 0x46, 0x48, - 0x48, 0x52, 0x52, 0x41, 0x43, 0x49, 0x49, 0x4c, 0x44, 0x48, 0x50, 0x4a, - 0x47, 0x48, 0x4c, 0x42, 0x49, 0x48, 0x52, 0x56, 0x4b, 0x41, 0x4e, 0x47, - 0x52, 0x56, 0x4e, 0x56, 0x4b, 0x38, 0x50, 0x55, 0x5a, 0x63, 0x51, 0x5a, - 0x54, 0x52, 0x44, 0x45, 0x47, 0x5e, 0x4c, 0x4a, 0x5e, 0x71, 0x56, 0x44, - 0x4c, 0x4b, 0x4c, 0x4e, 0x49, 0x69, 0x50, 0x53, 0x4d, 0x5c, 0x59, 0x50, - 0x36, 0x5d, 0x46, 0x5b, 0x51, 0x55, 0x55, 0x51, 0x36, 0x5a, 0x53, 0x56, - 0x54, 0x4a, 0x55, 0x53, 0x3c, 0x52, 0x4a, 0x45, 0x4c, 0x56, 0x49, 0x46, - 0x4f, 0x5b, 0x43, 0x4b, 0x49, 0x4c, 0x4b, 0x41, 0x44, 0x4b, 0x47, 0x4b, - 0x4b, 0x54, 0x4a, 0x4c, 0x49, 0x44, 0x46, 0x46, 0x48, 0x49, 0x47, 0x4a, - 0x40, 0x4e, 0x47, 0x53, 0x4a, 0x47, 0x4a, 0x3b, 0x48, 0x4b, 0x50, 0x51, - 0x50, 0x44, 0x4d, 0x49, 0x42, 0x4b, 0x43, 0x48, 0x4a, 0x43, 0x4d, 0x4d, - 0x49, 0x4d, 0x43, 0x4f, 0x50, 0x49, 0x47, 0x48, 0x48, 0x4f, 0x49, 0x41, - 0x4c, 0x46, 0x47, 0x3e, 0x51, 0x4d, 0x4e, 0x42, 0x3d, 0x53, 0x4d, 0x3b, - 0x53, 0x52, 0x4c, 0x4c, 0x43, 0x46, 0x43, 0x3d, 0x53, 0x48, 0x43, 0x4e, - 0x45, 0x52, 0x4d, 0x4a, 0x44, 0x49, 0x47, 0x4c, 0x4e, 0x4c, 0x4a, 0x4e, - 0x41, 0x48, 0x4b, 0x44, 0x4d, 0x4a, 0x4d, 0x44, 0x4a, 0x45, 0x4f, 0x52, - 0x45, 0x3f, 0x4b, 0x48, 0x43, 0x41, 0x3d, 0x53, 0x53, 0x50, 0x4a, 0x56, - 0x4d, 0x3e, 0x55, 0x4e, 0x56, 0x5e, 0x52, 0x52, 0x54, 0x50, 0x42, 0x4a, - 0x4d, 0x5f, 0x4f, 0x49, 0x5d, 0x6f, 0x55, 0x4a, 0x47, 0x49, 0x4e, 0x4a, - 0x43, 0x6e, 0x4e, 0x4f, 0x52, 0x59, 0x62, 0x4b, 0x3e, 0x5c, 0x4c, 0x4e, - 0x45, 0x52, 0x43, 0x4d, 0x3c, 0x58, 0x52, 0x49, 0x48, 0x55, 0x53, 0x4e, - 0x3d, 0x4e, 0x4c, 0x4b, 0x4b, 0x50, 0x4a, 0x47, 0x45, 0x62, 0x50, 0x49, - 0x48, 0x4b, 0x55, 0x45, 0x46, 0x51, 0x41, 0x55, 0x54, 0x55, 0x50, 0x47, - 0x46, 0x4d, 0x46, 0x4b, 0x41, 0x49, 0x4c, 0x40, 0x45, 0x4f, 0x52, 0x54, - 0x45, 0x4d, 0x53, 0x3a, 0x4c, 0x55, 0x4e, 0x48, 0x44, 0x45, 0x56, 0x3c, - 0x48, 0x46, 0x4b, 0x51, 0x53, 0x43, 0x41, 0x49, 0x4c, 0x52, 0x48, 0x42, - 0x48, 0x3f, 0x4c, 0x38, 0x46, 0x50, 0x4a, 0x44, 0x50, 0x54, 0x4e, 0x38, - 0x48, 0x42, 0x43, 0x4a, 0x4c, 0x44, 0x47, 0x42, 0x42, 0x46, 0x4a, 0x50, - 0x47, 0x4b, 0x43, 0x40, 0x44, 0x46, 0x46, 0x4d, 0x50, 0x4a, 0x4e, 0x51, - 0x44, 0x40, 0x50, 0x43, 0x52, 0x4d, 0x42, 0x4c, 0x50, 0x41, 0x4a, 0x4e, - 0x45, 0x49, 0x4d, 0x40, 0x46, 0x51, 0x43, 0x4b, 0x48, 0x47, 0x42, 0x55, - 0x4a, 0x41, 0x4f, 0x49, 0x4f, 0x4e, 0x47, 0x4c, 0x4a, 0x48, 0x50, 0x4e, - 0x50, 0x57, 0x4e, 0x56, 0x56, 0x4e, 0x44, 0x48, 0x4a, 0x5b, 0x55, 0x49, - 0x59, 0x67, 0x54, 0x46, 0x4f, 0x41, 0x4d, 0x4e, 0x4a, 0x63, 0x4d, 0x44, - 0x53, 0x5b, 0x59, 0x4f, 0x43, 0x55, 0x56, 0x4e, 0x55, 0x4c, 0x4b, 0x54, - 0x3c, 0x56, 0x4d, 0x50, 0x4f, 0x4a, 0x5a, 0x47, 0x48, 0x56, 0x4f, 0x4f, - 0x50, 0x51, 0x48, 0x4e, 0x4d, 0x50, 0x4e, 0x45, 0x4b, 0x48, 0x4e, 0x44, - 0x46, 0x4d, 0x43, 0x46, 0x41, 0x59, 0x53, 0x4b, 0x4a, 0x3e, 0x51, 0x47, - 0x43, 0x48, 0x52, 0x3f, 0x43, 0x50, 0x4b, 0x4f, 0x41, 0x48, 0x43, 0x2e, - 0x4d, 0x4e, 0x4c, 0x45, 0x45, 0x46, 0x4b, 0x43, 0x46, 0x49, 0x46, 0x4d, - 0x47, 0x4e, 0x4d, 0x3c, 0x47, 0x4a, 0x52, 0x4e, 0x41, 0x50, 0x43, 0x3a, - 0x50, 0x47, 0x4a, 0x45, 0x52, 0x4a, 0x4c, 0x3f, 0x42, 0x3d, 0x49, 0x48, - 0x48, 0x4c, 0x42, 0x3a, 0x40, 0x47, 0x46, 0x4e, 0x44, 0x52, 0x46, 0x44, - 0x4a, 0x44, 0x43, 0x49, 0x42, 0x45, 0x3f, 0x50, 0x4c, 0x44, 0x48, 0x43, - 0x47, 0x4a, 0x48, 0x48, 0x3e, 0x45, 0x43, 0x48, 0x4a, 0x48, 0x53, 0x4b, - 0x50, 0x49, 0x43, 0x4d, 0x53, 0x4f, 0x4b, 0x4b, 0x40, 0x42, 0x50, 0x4d, - 0x53, 0x4e, 0x44, 0x4d, 0x45, 0x3d, 0x51, 0x51, 0x4f, 0x59, 0x4b, 0x51, - 0x4a, 0x4e, 0x42, 0x40, 0x49, 0x5b, 0x4b, 0x43, 0x53, 0x60, 0x47, 0x49, - 0x4a, 0x44, 0x44, 0x48, 0x4b, 0x60, 0x51, 0x3f, 0x4b, 0x5b, 0x4f, 0x4a, - 0x4a, 0x50, 0x49, 0x46, 0x55, 0x50, 0x4b, 0x4c, 0x40, 0x4e, 0x51, 0x4f, - 0x4b, 0x51, 0x54, 0x50, 0x48, 0x4e, 0x4a, 0x4f, 0x4d, 0x4e, 0x54, 0x4d, - 0x41, 0x50, 0x4e, 0x47, 0x47, 0x47, 0x54, 0x3b, 0x51, 0x54, 0x50, 0x49, - 0x48, 0x4c, 0x4e, 0x47, 0x3f, 0x3c, 0x4c, 0x43, 0x45, 0x42, 0x45, 0x37, - 0x41, 0x52, 0x49, 0x47, 0x4e, 0x4a, 0x4b, 0x37, 0x48, 0x4d, 0x4e, 0x4a, - 0x42, 0x56, 0x3d, 0x35, 0x48, 0x42, 0x4b, 0x4a, 0x44, 0x52, 0x40, 0x48, - 0x4f, 0x49, 0x4f, 0x4c, 0x4d, 0x43, 0x49, 0x38, 0x4b, 0x42, 0x48, 0x42, - 0x45, 0x45, 0x54, 0x3a, 0x47, 0x47, 0x52, 0x45, 0x4a, 0x48, 0x47, 0x39, - 0x4d, 0x45, 0x54, 0x4b, 0x4e, 0x4f, 0x4e, 0x38, 0x4a, 0x4b, 0x48, 0x45, - 0x4e, 0x43, 0x4e, 0x4e, 0x46, 0x4e, 0x4e, 0x50, 0x46, 0x4c, 0x42, 0x45, - 0x4b, 0x46, 0x47, 0x4d, 0x49, 0x3f, 0x4f, 0x50, 0x46, 0x4a, 0x47, 0x4e, - 0x4a, 0x3e, 0x50, 0x46, 0x47, 0x40, 0x4f, 0x47, 0x51, 0x4b, 0x43, 0x46, - 0x4a, 0x42, 0x55, 0x4d, 0x46, 0x63, 0x49, 0x4e, 0x4f, 0x4f, 0x42, 0x45, - 0x50, 0x57, 0x49, 0x3e, 0x57, 0x63, 0x45, 0x4a, 0x49, 0x50, 0x41, 0x4a, - 0x48, 0x64, 0x4f, 0x42, 0x47, 0x58, 0x4b, 0x45, 0x43, 0x57, 0x49, 0x58, - 0x51, 0x51, 0x47, 0x43, 0x51, 0x4b, 0x4a, 0x45, 0x50, 0x54, 0x4d, 0x4d, - 0x3e, 0x4a, 0x50, 0x40, 0x51, 0x4f, 0x52, 0x48, 0x53, 0x49, 0x44, 0x4b, - 0x51, 0x4b, 0x50, 0x42, 0x4d, 0x49, 0x4a, 0x46, 0x44, 0x50, 0x47, 0x3f, - 0x48, 0x47, 0x41, 0x4a, 0x42, 0x52, 0x4a, 0x33, 0x50, 0x50, 0x54, 0x3f, - 0x44, 0x4e, 0x51, 0x3c, 0x4e, 0x51, 0x48, 0x4b, 0x47, 0x49, 0x3f, 0x3d, - 0x4e, 0x46, 0x4a, 0x41, 0x40, 0x50, 0x49, 0x40, 0x4a, 0x4b, 0x45, 0x50, - 0x4e, 0x4d, 0x4b, 0x39, 0x4e, 0x4b, 0x48, 0x3c, 0x47, 0x44, 0x4c, 0x42, - 0x45, 0x50, 0x3e, 0x54, 0x4d, 0x49, 0x48, 0x3c, 0x45, 0x42, 0x55, 0x4a, - 0x41, 0x4f, 0x40, 0x3f, 0x47, 0x46, 0x46, 0x44, 0x4f, 0x47, 0x46, 0x44, - 0x41, 0x40, 0x44, 0x48, 0x3e, 0x3c, 0x46, 0x3e, 0x4a, 0x45, 0x4c, 0x52, - 0x47, 0x42, 0x47, 0x3f, 0x47, 0x4e, 0x4b, 0x53, 0x4a, 0x3d, 0x4d, 0x47, - 0x4f, 0x3d, 0x4e, 0x43, 0x4f, 0x46, 0x43, 0x43, 0x46, 0x41, 0x4f, 0x42, - 0x46, 0x57, 0x4d, 0x51, 0x49, 0x51, 0x4c, 0x44, 0x51, 0x4f, 0x46, 0x44, - 0x54, 0x5d, 0x4f, 0x40, 0x59, 0x46, 0x53, 0x46, 0x48, 0x54, 0x43, 0x45, - 0x4d, 0x51, 0x4f, 0x44, 0x44, 0x53, 0x49, 0x4e, 0x48, 0x46, 0x44, 0x4a, - 0x4a, 0x42, 0x4c, 0x46, 0x54, 0x4f, 0x52, 0x47, 0x46, 0x44, 0x4c, 0x4d, - 0x4c, 0x47, 0x4d, 0x40, 0x55, 0x58, 0x46, 0x46, 0x3f, 0x3e, 0x47, 0x36, - 0x3f, 0x4d, 0x4b, 0x4d, 0x4f, 0x4f, 0x48, 0x34, 0x4d, 0x46, 0x46, 0x50, - 0x50, 0x4b, 0x47, 0x45, 0x4e, 0x49, 0x50, 0x4f, 0x4a, 0x48, 0x4f, 0x39, - 0x53, 0x4c, 0x4b, 0x56, 0x45, 0x4f, 0x55, 0x3a, 0x40, 0x53, 0x43, 0x4b, - 0x47, 0x3d, 0x4c, 0x34, 0x4b, 0x4e, 0x4a, 0x4b, 0x4d, 0x49, 0x4e, 0x40, - 0x4d, 0x48, 0x40, 0x4a, 0x4a, 0x4b, 0x4a, 0x42, 0x4c, 0x52, 0x43, 0x42, - 0x44, 0x3f, 0x4e, 0x42, 0x44, 0x45, 0x40, 0x3d, 0x4b, 0x45, 0x4a, 0x43, - 0x4b, 0x4b, 0x4e, 0x46, 0x55, 0x43, 0x44, 0x3f, 0x44, 0x43, 0x4b, 0x4b, - 0x45, 0x51, 0x48, 0x49, 0x3d, 0x44, 0x4a, 0x4a, 0x50, 0x50, 0x47, 0x44, - 0x4f, 0x3e, 0x3f, 0x43, 0x4c, 0x46, 0x4a, 0x4e, 0x4c, 0x52, 0x48, 0x4e, - 0x48, 0x46, 0x45, 0x48, 0x41, 0x4f, 0x51, 0x48, 0x40, 0x4d, 0x4a, 0x4b, - 0x4c, 0x51, 0x49, 0x50, 0x4e, 0x4b, 0x4a, 0x42, 0x49, 0x54, 0x4e, 0x43, - 0x52, 0x47, 0x4a, 0x41, 0x42, 0x51, 0x48, 0x4a, 0x46, 0x45, 0x4a, 0x43, - 0x4e, 0x4f, 0x41, 0x49, 0x4b, 0x42, 0x40, 0x4a, 0x50, 0x41, 0x42, 0x3f, - 0x49, 0x4a, 0x40, 0x3e, 0x3f, 0x42, 0x4d, 0x51, 0x4e, 0x4e, 0x47, 0x41, - 0x4e, 0x4e, 0x49, 0x4b, 0x41, 0x45, 0x51, 0x40, 0x45, 0x4c, 0x3f, 0x42, - 0x4c, 0x45, 0x4d, 0x39, 0x46, 0x52, 0x4a, 0x4e, 0x4c, 0x49, 0x4e, 0x43, - 0x43, 0x4c, 0x48, 0x46, 0x48, 0x49, 0x50, 0x3a, 0x3f, 0x49, 0x42, 0x4f, - 0x42, 0x4d, 0x4e, 0x3f, 0x51, 0x4b, 0x4e, 0x4b, 0x51, 0x44, 0x43, 0x4a, - 0x4a, 0x4c, 0x50, 0x48, 0x45, 0x47, 0x4d, 0x41, 0x47, 0x45, 0x51, 0x41, - 0x42, 0x48, 0x4c, 0x39, 0x51, 0x45, 0x46, 0x53, 0x4b, 0x50, 0x46, 0x45, - 0x4b, 0x4d, 0x42, 0x4b, 0x3f, 0x45, 0x4b, 0x4e, 0x50, 0x50, 0x47, 0x4a, - 0x45, 0x40, 0x4b, 0x43, 0x3f, 0x4a, 0x41, 0x42, 0x51, 0x41, 0x4d, 0x42, - 0x53, 0x48, 0x48, 0x49, 0x4b, 0x40, 0x42, 0x3d, 0x4f, 0x53, 0x49, 0x46, - 0x46, 0x43, 0x42, 0x44, 0x46, 0x48, 0x3f, 0x46, 0x31, 0x43, 0x4d, 0x4b, - 0x48, 0x4d, 0x4c, 0x43, 0x45, 0x53, 0x50, 0x40, 0x4a, 0x48, 0x45, 0x3b, - 0x4f, 0x4d, 0x53, 0x4c, 0x44, 0x54, 0x50, 0x66, 0x3f, 0x45, 0x4c, 0x4c, - 0x4a, 0x49, 0x49, 0x4a, 0x40, 0x52, 0x3e, 0x4c, 0x49, 0x40, 0x44, 0x49, - 0x48, 0x3f, 0x45, 0x5b, 0x49, 0x4b, 0x4c, 0x44, 0x50, 0x4e, 0x4a, 0x4a, - 0x49, 0x4e, 0x4f, 0x47, 0x46, 0x4b, 0x44, 0x3b, 0x4e, 0x4b, 0x48, 0x46, - 0x45, 0x45, 0x3d, 0x35, 0x4c, 0x49, 0x54, 0x42, 0x51, 0x46, 0x49, 0x2d, - 0x43, 0x4a, 0x53, 0x49, 0x49, 0x42, 0x4f, 0x40, 0x4e, 0x50, 0x54, 0x51, - 0x4b, 0x45, 0x48, 0x35, 0x4d, 0x41, 0x51, 0x40, 0x41, 0x49, 0x4a, 0x3b, - 0x45, 0x50, 0x48, 0x51, 0x51, 0x4d, 0x4c, 0x36, 0x47, 0x4a, 0x44, 0x45, - 0x4d, 0x47, 0x43, 0x3a, 0x48, 0x40, 0x42, 0x4f, 0x4f, 0x4f, 0x4f, 0x43, - 0x4a, 0x41, 0x4b, 0x53, 0x43, 0x46, 0x4f, 0x39, 0x46, 0x4a, 0x4d, 0x53, - 0x41, 0x44, 0x4e, 0x44, 0x3f, 0x47, 0x4c, 0x4d, 0x4d, 0x43, 0x45, 0x3d, - 0x43, 0x4b, 0x3e, 0x48, 0x42, 0x4c, 0x47, 0x42, 0x42, 0x50, 0x49, 0x4b, - 0x43, 0x4e, 0x44, 0x44, 0x4c, 0x3d, 0x4c, 0x47, 0x4e, 0x42, 0x4b, 0x44, - 0x4b, 0x44, 0x3f, 0x49, 0x33, 0x46, 0x4a, 0x4a, 0x42, 0x57, 0x5e, 0x4a, - 0x46, 0x4f, 0x55, 0x3c, 0x4a, 0x4b, 0x4c, 0x43, 0x51, 0x59, 0x64, 0x51, - 0x45, 0x60, 0x4b, 0x65, 0x46, 0x4a, 0x4e, 0x49, 0x41, 0x4b, 0x50, 0x5c, - 0x48, 0x4b, 0x3e, 0x52, 0x4f, 0x2f, 0x4e, 0x4a, 0x45, 0x53, 0x48, 0x59, - 0x4c, 0x4e, 0x4a, 0x4d, 0x49, 0x40, 0x52, 0x44, 0x49, 0x46, 0x4e, 0x46, - 0x42, 0x4b, 0x4a, 0x4b, 0x4b, 0x4b, 0x4f, 0x52, 0x46, 0x50, 0x4d, 0x3d, - 0x46, 0x4b, 0x4b, 0x40, 0x4d, 0x3f, 0x43, 0x33, 0x4e, 0x53, 0x4b, 0x4a, - 0x45, 0x48, 0x4c, 0x2e, 0x48, 0x4f, 0x49, 0x42, 0x54, 0x4f, 0x4b, 0x2b, - 0x55, 0x4e, 0x43, 0x4d, 0x4d, 0x47, 0x42, 0x3e, 0x48, 0x48, 0x4d, 0x54, - 0x52, 0x4f, 0x43, 0x37, 0x4b, 0x42, 0x4b, 0x4e, 0x49, 0x49, 0x4b, 0x2e, - 0x45, 0x4e, 0x48, 0x4e, 0x44, 0x49, 0x48, 0x30, 0x4c, 0x4b, 0x3f, 0x42, - 0x4f, 0x4f, 0x4e, 0x38, 0x4f, 0x42, 0x54, 0x49, 0x41, 0x42, 0x45, 0x3a, - 0x47, 0x43, 0x43, 0x4b, 0x49, 0x40, 0x4d, 0x38, 0x52, 0x4c, 0x3d, 0x4d, - 0x43, 0x54, 0x4e, 0x41, 0x4a, 0x47, 0x44, 0x51, 0x47, 0x48, 0x41, 0x47, - 0x4d, 0x41, 0x46, 0x4c, 0x4d, 0x46, 0x51, 0x4a, 0x49, 0x46, 0x4a, 0x42, - 0x3a, 0x43, 0x4a, 0x4b, 0x43, 0x4c, 0x68, 0x44, 0x4b, 0x52, 0x50, 0x37, - 0x4d, 0x4c, 0x57, 0x4c, 0x68, 0x62, 0x64, 0x4a, 0x3e, 0x64, 0x4b, 0x66, - 0x48, 0x4d, 0x54, 0x57, 0x4b, 0x52, 0x49, 0x5c, 0x4d, 0x55, 0x51, 0x57, - 0x4c, 0x3a, 0x48, 0x43, 0x3b, 0x43, 0x52, 0x5d, 0x45, 0x4e, 0x51, 0x4d, - 0x4a, 0x55, 0x4e, 0x4c, 0x44, 0x51, 0x4c, 0x4f, 0x41, 0x4f, 0x4a, 0x43, - 0x53, 0x48, 0x47, 0x49, 0x46, 0x52, 0x48, 0x3e, 0x4b, 0x4e, 0x4a, 0x50, - 0x4f, 0x47, 0x3e, 0x2e, 0x4b, 0x51, 0x4a, 0x44, 0x4c, 0x49, 0x4f, 0x26, - 0x48, 0x4f, 0x44, 0x51, 0x48, 0x3f, 0x4c, 0x30, 0x4e, 0x48, 0x4d, 0x48, - 0x48, 0x44, 0x4b, 0x2f, 0x50, 0x41, 0x4d, 0x50, 0x52, 0x42, 0x45, 0x33, - 0x4c, 0x48, 0x48, 0x3d, 0x46, 0x41, 0x43, 0x38, 0x45, 0x4f, 0x48, 0x4b, - 0x41, 0x49, 0x4c, 0x2f, 0x53, 0x4c, 0x48, 0x4a, 0x47, 0x40, 0x4a, 0x31, - 0x52, 0x40, 0x49, 0x4c, 0x3f, 0x48, 0x48, 0x39, 0x48, 0x3f, 0x45, 0x43, - 0x40, 0x48, 0x3c, 0x40, 0x4c, 0x48, 0x48, 0x4d, 0x3e, 0x42, 0x4a, 0x3d, - 0x4c, 0x45, 0x44, 0x46, 0x44, 0x45, 0x4a, 0x47, 0x52, 0x48, 0x4a, 0x4d, - 0x3f, 0x49, 0x4c, 0x4c, 0x48, 0x44, 0x4c, 0x44, 0x3d, 0x41, 0x47, 0x45, - 0x43, 0x4a, 0x5a, 0x3f, 0x48, 0x5d, 0x50, 0x35, 0x47, 0x4f, 0x5b, 0x46, - 0x6e, 0x50, 0x6d, 0x44, 0x49, 0x6a, 0x53, 0x6b, 0x4b, 0x4b, 0x4f, 0x62, - 0x45, 0x57, 0x48, 0x5b, 0x40, 0x4b, 0x4f, 0x63, 0x48, 0x3a, 0x4b, 0x42, - 0x43, 0x53, 0x41, 0x5f, 0x54, 0x3e, 0x4d, 0x43, 0x3d, 0x4c, 0x46, 0x46, - 0x49, 0x56, 0x4b, 0x45, 0x47, 0x45, 0x4e, 0x4f, 0x4c, 0x4d, 0x4f, 0x47, - 0x49, 0x4b, 0x51, 0x33, 0x4b, 0x45, 0x4d, 0x41, 0x51, 0x4a, 0x43, 0x2a, - 0x50, 0x4b, 0x4a, 0x4b, 0x4c, 0x52, 0x4c, 0x3b, 0x45, 0x4c, 0x51, 0x44, - 0x4c, 0x48, 0x43, 0x35, 0x51, 0x50, 0x48, 0x49, 0x3f, 0x48, 0x3d, 0x3b, - 0x52, 0x3f, 0x42, 0x4b, 0x49, 0x49, 0x47, 0x38, 0x4a, 0x4a, 0x41, 0x52, - 0x41, 0x3e, 0x4b, 0x2f, 0x46, 0x4d, 0x49, 0x44, 0x46, 0x3b, 0x47, 0x36, - 0x46, 0x3f, 0x49, 0x48, 0x47, 0x42, 0x42, 0x35, 0x44, 0x4b, 0x4d, 0x56, - 0x50, 0x49, 0x43, 0x42, 0x4b, 0x3e, 0x53, 0x44, 0x4a, 0x43, 0x47, 0x38, - 0x4a, 0x45, 0x4d, 0x3f, 0x46, 0x4a, 0x47, 0x3a, 0x4c, 0x3e, 0x47, 0x45, - 0x46, 0x4b, 0x45, 0x49, 0x4a, 0x4b, 0x54, 0x49, 0x4a, 0x53, 0x4a, 0x4c, - 0x45, 0x48, 0x53, 0x42, 0x4b, 0x47, 0x4e, 0x50, 0x3d, 0x51, 0x60, 0x3e, - 0x53, 0x5d, 0x51, 0x30, 0x45, 0x50, 0x59, 0x4e, 0x62, 0x52, 0x68, 0x51, - 0x45, 0x6c, 0x4c, 0x64, 0x4d, 0x47, 0x55, 0x61, 0x44, 0x57, 0x44, 0x58, - 0x44, 0x4a, 0x53, 0x58, 0x47, 0x31, 0x3f, 0x4c, 0x43, 0x45, 0x48, 0x5e, - 0x41, 0x43, 0x3f, 0x43, 0x51, 0x46, 0x48, 0x4b, 0x4d, 0x5b, 0x45, 0x4b, - 0x48, 0x46, 0x3f, 0x45, 0x47, 0x45, 0x40, 0x4a, 0x51, 0x51, 0x3d, 0x3f, - 0x43, 0x45, 0x4d, 0x4a, 0x47, 0x50, 0x49, 0x32, 0x4c, 0x5a, 0x55, 0x4f, - 0x4c, 0x51, 0x43, 0x37, 0x40, 0x59, 0x49, 0x49, 0x4e, 0x4f, 0x47, 0x34, - 0x40, 0x4c, 0x4a, 0x41, 0x4a, 0x47, 0x4a, 0x42, 0x4e, 0x4a, 0x48, 0x4e, - 0x4e, 0x4e, 0x45, 0x39, 0x4e, 0x45, 0x45, 0x4e, 0x4c, 0x48, 0x4a, 0x35, - 0x45, 0x4c, 0x49, 0x4f, 0x51, 0x43, 0x3c, 0x3a, 0x4a, 0x4a, 0x46, 0x48, - 0x49, 0x42, 0x4e, 0x2f, 0x42, 0x4e, 0x45, 0x50, 0x51, 0x40, 0x45, 0x32, - 0x4a, 0x4d, 0x44, 0x4e, 0x48, 0x48, 0x47, 0x2f, 0x48, 0x4b, 0x49, 0x44, - 0x48, 0x4d, 0x46, 0x3b, 0x46, 0x4a, 0x41, 0x4e, 0x4e, 0x47, 0x54, 0x4b, - 0x45, 0x49, 0x45, 0x44, 0x45, 0x48, 0x4a, 0x46, 0x55, 0x49, 0x47, 0x49, - 0x4b, 0x42, 0x48, 0x4f, 0x3f, 0x52, 0x60, 0x39, 0x4b, 0x5e, 0x55, 0x2e, - 0x48, 0x50, 0x59, 0x4f, 0x68, 0x5f, 0x64, 0x4f, 0x3b, 0x71, 0x50, 0x63, - 0x4f, 0x50, 0x50, 0x6c, 0x4b, 0x55, 0x47, 0x5b, 0x4c, 0x40, 0x48, 0x59, - 0x4f, 0x2e, 0x4b, 0x4c, 0x4e, 0x4e, 0x46, 0x61, 0x50, 0x41, 0x4c, 0x4a, - 0x44, 0x3e, 0x3f, 0x47, 0x4b, 0x4f, 0x47, 0x4b, 0x47, 0x3d, 0x41, 0x49, - 0x49, 0x3f, 0x4d, 0x44, 0x4a, 0x4d, 0x45, 0x41, 0x4d, 0x43, 0x49, 0x3c, - 0x49, 0x57, 0x49, 0x3b, 0x49, 0x59, 0x3f, 0x4f, 0x4e, 0x49, 0x4e, 0x46, - 0x52, 0x4e, 0x4c, 0x54, 0x4a, 0x48, 0x48, 0x3a, 0x44, 0x4a, 0x4f, 0x4a, - 0x44, 0x4b, 0x43, 0x4d, 0x51, 0x42, 0x53, 0x4d, 0x52, 0x41, 0x4d, 0x43, - 0x4e, 0x54, 0x4b, 0x42, 0x4b, 0x3f, 0x53, 0x45, 0x3f, 0x4a, 0x45, 0x50, - 0x3f, 0x4c, 0x4f, 0x43, 0x46, 0x42, 0x4b, 0x4d, 0x4c, 0x3b, 0x48, 0x40, - 0x4e, 0x4e, 0x49, 0x46, 0x4d, 0x4d, 0x52, 0x40, 0x4e, 0x4f, 0x46, 0x4a, - 0x40, 0x4b, 0x4c, 0x40, 0x4f, 0x4a, 0x44, 0x41, 0x46, 0x3c, 0x40, 0x3d, - 0x44, 0x48, 0x4a, 0x50, 0x46, 0x53, 0x46, 0x40, 0x44, 0x3e, 0x47, 0x43, - 0x48, 0x3d, 0x4e, 0x3e, 0x48, 0x49, 0x4b, 0x49, 0x4c, 0x3e, 0x4c, 0x4a, - 0x46, 0x4e, 0x62, 0x3c, 0x59, 0x60, 0x51, 0x29, 0x47, 0x52, 0x59, 0x4c, - 0x67, 0x68, 0x68, 0x4e, 0x3b, 0x72, 0x4d, 0x68, 0x44, 0x4f, 0x53, 0x63, - 0x47, 0x5a, 0x45, 0x4f, 0x4b, 0x37, 0x43, 0x5b, 0x4b, 0x3d, 0x44, 0x41, - 0x4a, 0x4b, 0x3c, 0x64, 0x48, 0x38, 0x42, 0x3f, 0x48, 0x46, 0x4b, 0x46, - 0x46, 0x4f, 0x46, 0x46, 0x44, 0x3c, 0x4b, 0x4f, 0x4d, 0x4a, 0x4b, 0x46, - 0x4d, 0x4f, 0x4f, 0x3f, 0x3a, 0x4b, 0x55, 0x3c, 0x51, 0x56, 0x4d, 0x42, - 0x52, 0x5a, 0x3e, 0x4b, 0x54, 0x57, 0x4e, 0x4d, 0x4e, 0x5b, 0x4e, 0x49, - 0x4e, 0x3c, 0x40, 0x41, 0x40, 0x4d, 0x48, 0x42, 0x49, 0x4e, 0x4f, 0x47, - 0x47, 0x48, 0x50, 0x49, 0x51, 0x46, 0x44, 0x45, 0x49, 0x46, 0x43, 0x48, - 0x48, 0x49, 0x4d, 0x4c, 0x45, 0x4f, 0x4c, 0x45, 0x44, 0x40, 0x49, 0x45, - 0x49, 0x51, 0x4b, 0x4b, 0x50, 0x4b, 0x48, 0x3d, 0x4e, 0x52, 0x4a, 0x47, - 0x49, 0x41, 0x55, 0x3d, 0x48, 0x4d, 0x49, 0x48, 0x4e, 0x4c, 0x48, 0x3d, - 0x3f, 0x4c, 0x4e, 0x53, 0x3e, 0x48, 0x4a, 0x3f, 0x54, 0x4d, 0x54, 0x4b, - 0x47, 0x4e, 0x44, 0x48, 0x49, 0x4b, 0x4c, 0x49, 0x4d, 0x42, 0x52, 0x4b, - 0x40, 0x3e, 0x54, 0x49, 0x55, 0x45, 0x47, 0x4d, 0x45, 0x5c, 0x60, 0x40, - 0x57, 0x60, 0x5b, 0x27, 0x4a, 0x5a, 0x64, 0x53, 0x6a, 0x5a, 0x5f, 0x52, - 0x3a, 0x72, 0x4b, 0x5f, 0x45, 0x56, 0x5f, 0x5f, 0x54, 0x5f, 0x39, 0x52, - 0x51, 0x3e, 0x3b, 0x5a, 0x44, 0x32, 0x46, 0x50, 0x3a, 0x4f, 0x44, 0x5d, - 0x4c, 0x41, 0x39, 0x3f, 0x45, 0x46, 0x3b, 0x43, 0x46, 0x51, 0x3c, 0x4c, - 0x4b, 0x43, 0x4b, 0x51, 0x43, 0x48, 0x4d, 0x43, 0x38, 0x46, 0x46, 0x43, - 0x44, 0x4a, 0x46, 0x49, 0x48, 0x50, 0x4e, 0x4a, 0x4e, 0x58, 0x4a, 0x49, - 0x48, 0x4f, 0x4a, 0x49, 0x41, 0x57, 0x51, 0x50, 0x4b, 0x48, 0x47, 0x4b, - 0x53, 0x3d, 0x4b, 0x4c, 0x4b, 0x4b, 0x55, 0x56, 0x45, 0x49, 0x46, 0x4c, - 0x45, 0x51, 0x47, 0x50, 0x40, 0x4b, 0x4f, 0x4b, 0x4d, 0x4a, 0x4f, 0x50, - 0x49, 0x53, 0x50, 0x46, 0x40, 0x48, 0x4a, 0x4a, 0x49, 0x4a, 0x42, 0x45, - 0x4b, 0x45, 0x42, 0x45, 0x4e, 0x4e, 0x44, 0x41, 0x4b, 0x4a, 0x49, 0x3f, - 0x41, 0x51, 0x48, 0x4c, 0x40, 0x41, 0x51, 0x42, 0x49, 0x49, 0x48, 0x42, - 0x48, 0x4c, 0x4b, 0x3c, 0x49, 0x45, 0x42, 0x49, 0x4c, 0x46, 0x45, 0x43, - 0x43, 0x48, 0x48, 0x41, 0x43, 0x42, 0x4c, 0x4b, 0x40, 0x45, 0x44, 0x46, - 0x4c, 0x4b, 0x4e, 0x4d, 0x3f, 0x59, 0x55, 0x41, 0x56, 0x5a, 0x51, 0x30, - 0x49, 0x5a, 0x63, 0x4d, 0x61, 0x5b, 0x64, 0x55, 0x34, 0x7a, 0x4c, 0x62, - 0x3e, 0x5d, 0x56, 0x60, 0x48, 0x61, 0x3f, 0x54, 0x46, 0x40, 0x42, 0x56, - 0x52, 0x35, 0x4c, 0x59, 0x45, 0x4c, 0x42, 0x60, 0x49, 0x3f, 0x4c, 0x3c, - 0x52, 0x36, 0x46, 0x3d, 0x58, 0x4b, 0x41, 0x48, 0x3e, 0x45, 0x4e, 0x54, - 0x4c, 0x56, 0x47, 0x44, 0x39, 0x4a, 0x4a, 0x4a, 0x46, 0x48, 0x4a, 0x48, - 0x51, 0x4f, 0x4b, 0x49, 0x45, 0x4b, 0x44, 0x4c, 0x3e, 0x4c, 0x42, 0x59, - 0x47, 0x55, 0x47, 0x47, 0x41, 0x44, 0x44, 0x4a, 0x44, 0x4b, 0x44, 0x46, - 0x49, 0x5a, 0x48, 0x5d, 0x4f, 0x4a, 0x47, 0x50, 0x48, 0x4e, 0x44, 0x57, - 0x49, 0x46, 0x42, 0x4d, 0x3d, 0x4a, 0x4a, 0x58, 0x41, 0x4d, 0x3c, 0x47, - 0x42, 0x4e, 0x4d, 0x49, 0x44, 0x4b, 0x4c, 0x4b, 0x53, 0x42, 0x4a, 0x46, - 0x4e, 0x56, 0x4b, 0x47, 0x50, 0x43, 0x4f, 0x48, 0x49, 0x50, 0x48, 0x50, - 0x42, 0x4c, 0x4e, 0x3c, 0x41, 0x4f, 0x4a, 0x41, 0x44, 0x47, 0x4c, 0x42, - 0x51, 0x4f, 0x53, 0x46, 0x4c, 0x4b, 0x48, 0x51, 0x47, 0x4b, 0x4c, 0x4d, - 0x4d, 0x49, 0x3d, 0x44, 0x4b, 0x42, 0x43, 0x49, 0x51, 0x47, 0x4c, 0x4b, - 0x4a, 0x50, 0x5b, 0x43, 0x5b, 0x68, 0x54, 0x31, 0x4c, 0x5d, 0x5c, 0x54, - 0x63, 0x5a, 0x61, 0x54, 0x3d, 0x7a, 0x51, 0x5b, 0x40, 0x59, 0x5a, 0x62, - 0x4c, 0x5e, 0x42, 0x58, 0x49, 0x3c, 0x38, 0x50, 0x54, 0x37, 0x42, 0x51, - 0x4d, 0x4f, 0x42, 0x68, 0x4a, 0x40, 0x4e, 0x40, 0x3f, 0x3e, 0x3f, 0x40, - 0x54, 0x52, 0x3e, 0x43, 0x46, 0x4a, 0x48, 0x51, 0x4e, 0x4d, 0x42, 0x47, - 0x3f, 0x51, 0x47, 0x44, 0x3f, 0x4c, 0x46, 0x47, 0x4f, 0x55, 0x4b, 0x4e, - 0x4c, 0x51, 0x40, 0x51, 0x47, 0x4a, 0x44, 0x5c, 0x48, 0x54, 0x4b, 0x46, - 0x49, 0x4b, 0x53, 0x59, 0x43, 0x3e, 0x45, 0x4e, 0x4f, 0x58, 0x4b, 0x64, - 0x41, 0x4b, 0x45, 0x4a, 0x4c, 0x51, 0x47, 0x57, 0x45, 0x46, 0x43, 0x4f, - 0x4d, 0x4d, 0x49, 0x58, 0x4b, 0x52, 0x43, 0x4b, 0x45, 0x4c, 0x50, 0x4c, - 0x4e, 0x4b, 0x40, 0x4c, 0x44, 0x4e, 0x4c, 0x47, 0x41, 0x55, 0x45, 0x4a, - 0x4c, 0x48, 0x46, 0x41, 0x47, 0x52, 0x44, 0x4f, 0x48, 0x49, 0x4b, 0x47, - 0x50, 0x4f, 0x42, 0x4a, 0x44, 0x4b, 0x52, 0x43, 0x45, 0x4e, 0x46, 0x49, - 0x45, 0x52, 0x51, 0x45, 0x44, 0x41, 0x4c, 0x46, 0x4c, 0x4b, 0x44, 0x4d, - 0x4f, 0x48, 0x44, 0x4d, 0x56, 0x48, 0x50, 0x4f, 0x3b, 0x4e, 0x55, 0x43, - 0x52, 0x62, 0x57, 0x2c, 0x4d, 0x5e, 0x5e, 0x50, 0x64, 0x5b, 0x6a, 0x55, - 0x39, 0x7d, 0x4b, 0x5e, 0x43, 0x54, 0x5d, 0x5c, 0x4d, 0x5c, 0x42, 0x51, - 0x4c, 0x3d, 0x46, 0x51, 0x4c, 0x2a, 0x3e, 0x54, 0x47, 0x48, 0x46, 0x64, - 0x42, 0x3d, 0x47, 0x3f, 0x42, 0x45, 0x49, 0x3b, 0x59, 0x50, 0x4c, 0x46, - 0x4d, 0x44, 0x47, 0x4d, 0x4a, 0x50, 0x41, 0x48, 0x43, 0x50, 0x3e, 0x44, - 0x4b, 0x53, 0x48, 0x49, 0x51, 0x51, 0x4d, 0x57, 0x49, 0x4f, 0x53, 0x50, - 0x46, 0x4f, 0x41, 0x5d, 0x47, 0x46, 0x49, 0x51, 0x45, 0x41, 0x4a, 0x56, - 0x4f, 0x4e, 0x4d, 0x4a, 0x3e, 0x55, 0x47, 0x65, 0x48, 0x51, 0x4d, 0x4e, - 0x46, 0x43, 0x48, 0x5b, 0x48, 0x4f, 0x4f, 0x48, 0x4b, 0x4d, 0x4e, 0x5c, - 0x4f, 0x4c, 0x54, 0x48, 0x4a, 0x4d, 0x4e, 0x4e, 0x44, 0x48, 0x43, 0x52, - 0x41, 0x52, 0x48, 0x4f, 0x46, 0x4f, 0x51, 0x41, 0x44, 0x45, 0x41, 0x4b, - 0x43, 0x4e, 0x4e, 0x42, 0x48, 0x41, 0x45, 0x43, 0x44, 0x43, 0x4c, 0x4c, - 0x51, 0x54, 0x4c, 0x32, 0x46, 0x52, 0x4e, 0x49, 0x40, 0x4d, 0x43, 0x4f, - 0x4a, 0x4d, 0x4d, 0x49, 0x46, 0x4c, 0x41, 0x4d, 0x41, 0x3a, 0x50, 0x4c, - 0x5a, 0x4e, 0x49, 0x53, 0x4d, 0x53, 0x53, 0x3d, 0x52, 0x64, 0x55, 0x2a, - 0x47, 0x5d, 0x61, 0x51, 0x5b, 0x5d, 0x66, 0x52, 0x3f, 0xfd, 0x55, 0x5a, - 0x4b, 0x54, 0x5b, 0x60, 0x49, 0x5d, 0x43, 0x57, 0x47, 0x41, 0x45, 0x5e, - 0x4c, 0x28, 0x3e, 0x40, 0x49, 0x4e, 0x40, 0x69, 0x4a, 0x44, 0x45, 0x43, - 0x45, 0x3d, 0x39, 0x40, 0x4c, 0x53, 0x4b, 0x3d, 0x4e, 0x43, 0x48, 0x55, - 0x4d, 0x50, 0x4d, 0x49, 0x4f, 0x48, 0x3e, 0x46, 0x47, 0x56, 0x40, 0x48, - 0x46, 0x53, 0x50, 0x5d, 0x43, 0x54, 0x49, 0x47, 0x49, 0x4c, 0x48, 0x5d, - 0x49, 0x51, 0x50, 0x3d, 0x41, 0x47, 0x48, 0x64, 0x4b, 0x44, 0x49, 0x41, - 0x54, 0x48, 0x3d, 0x6b, 0x4c, 0x5a, 0x48, 0x4e, 0x40, 0x4c, 0x52, 0x5f, - 0x54, 0x4a, 0x3f, 0x48, 0x43, 0x43, 0x44, 0x66, 0x49, 0x47, 0x43, 0x46, - 0x47, 0x54, 0x42, 0x54, 0x4b, 0x4e, 0x49, 0x49, 0x49, 0x4b, 0x52, 0x4f, - 0x43, 0x46, 0x4b, 0x49, 0x54, 0x4b, 0x40, 0x48, 0x47, 0x4a, 0x46, 0x47, - 0x44, 0x47, 0x4c, 0x37, 0x3f, 0x49, 0x45, 0x44, 0x50, 0x49, 0x44, 0x36, - 0x4d, 0x40, 0x45, 0x49, 0x53, 0x55, 0x44, 0x42, 0x47, 0x48, 0x46, 0x40, - 0x4f, 0x4c, 0x41, 0x42, 0x52, 0x3a, 0x43, 0x46, 0x55, 0x51, 0x4e, 0x4f, - 0x48, 0x51, 0x55, 0x48, 0x52, 0x66, 0x4e, 0x33, 0x49, 0x5b, 0x5f, 0x4b, - 0x5f, 0x5b, 0x66, 0x52, 0x41, 0x7c, 0x4a, 0x59, 0x47, 0x59, 0x58, 0x67, - 0x49, 0x5e, 0x44, 0x57, 0x49, 0x4c, 0x43, 0x56, 0x41, 0x27, 0x4c, 0x44, - 0x51, 0x44, 0x42, 0x65, 0x49, 0x44, 0x40, 0x3d, 0x4d, 0x3e, 0x4c, 0x3c, - 0x4f, 0x4b, 0x45, 0x44, 0x4d, 0x48, 0x47, 0x54, 0x4d, 0x4e, 0x44, 0x42, - 0x47, 0x44, 0x3d, 0x49, 0x4e, 0x50, 0x49, 0x45, 0x58, 0x4a, 0x54, 0x5c, - 0x41, 0x49, 0x4f, 0x42, 0x44, 0x4f, 0x4a, 0x62, 0x48, 0x50, 0x48, 0x43, - 0x51, 0x53, 0x47, 0x6c, 0x40, 0x46, 0x3d, 0x46, 0x4a, 0x50, 0x43, 0x69, - 0x49, 0x4f, 0x4a, 0x4c, 0x49, 0x46, 0x43, 0x6a, 0x48, 0x50, 0x49, 0x48, - 0x48, 0x51, 0x4b, 0x65, 0x42, 0x4b, 0x4d, 0x48, 0x44, 0x4e, 0x49, 0x60, - 0x44, 0x52, 0x42, 0x42, 0x47, 0x48, 0x4b, 0x51, 0x50, 0x4b, 0x3c, 0x4d, - 0x4c, 0x44, 0x48, 0x55, 0x51, 0x4c, 0x55, 0x4e, 0x52, 0x4c, 0x4b, 0x39, - 0x48, 0x42, 0x49, 0x49, 0x49, 0x50, 0x49, 0x32, 0x4e, 0x4b, 0x45, 0x4f, - 0x42, 0x4b, 0x47, 0x50, 0x48, 0x45, 0x54, 0x49, 0x4c, 0x46, 0x40, 0x46, - 0x43, 0x3d, 0x51, 0x44, 0x53, 0x4f, 0x54, 0x55, 0x43, 0x4f, 0x5b, 0x47, - 0x53, 0x6c, 0x57, 0x2e, 0x50, 0x55, 0x5a, 0x4d, 0x57, 0x5d, 0x70, 0x50, - 0x3f, 0x79, 0x4a, 0x5a, 0x4c, 0x58, 0x59, 0x63, 0x45, 0x69, 0x48, 0x58, - 0x42, 0x4b, 0x43, 0x5c, 0x46, 0x28, 0x48, 0x49, 0x4c, 0x3f, 0x45, 0x58, - 0x45, 0x44, 0x47, 0x40, 0x4c, 0x42, 0x3e, 0x37, 0x45, 0x54, 0x48, 0x3b, - 0x4e, 0x48, 0x43, 0x4a, 0x50, 0x4a, 0x49, 0x46, 0x4c, 0x54, 0x3f, 0x4b, - 0x4e, 0x56, 0x48, 0x49, 0x49, 0x4c, 0x51, 0x5f, 0x4d, 0x4b, 0x43, 0x4d, - 0x47, 0x51, 0x43, 0x59, 0x45, 0x4e, 0x4f, 0x45, 0x44, 0x54, 0x44, 0x6d, - 0x47, 0x51, 0x43, 0x4e, 0x4c, 0x4f, 0x43, 0x6d, 0x48, 0x53, 0x4b, 0x47, - 0x49, 0x48, 0x46, 0x6a, 0x51, 0x4c, 0x4d, 0x45, 0x4e, 0x47, 0x46, 0x62, - 0x4a, 0x54, 0x51, 0x4c, 0x47, 0x4d, 0x4a, 0x61, 0x3d, 0x50, 0x4c, 0x4c, - 0x45, 0x3f, 0x3e, 0x54, 0x3d, 0x53, 0x48, 0x47, 0x52, 0x4b, 0x47, 0x51, - 0x4f, 0x45, 0x4b, 0x4a, 0x4c, 0x46, 0x44, 0x37, 0x42, 0x50, 0x49, 0x4f, - 0x51, 0x41, 0x44, 0x38, 0x54, 0x40, 0x51, 0x52, 0x3e, 0x43, 0x44, 0x47, - 0x49, 0x4b, 0x4b, 0x46, 0x53, 0x54, 0x55, 0x4b, 0x4a, 0x37, 0x43, 0x4a, - 0x51, 0x47, 0x51, 0x54, 0x43, 0x46, 0x56, 0x3d, 0x54, 0x66, 0x4f, 0x30, - 0x45, 0x52, 0x5a, 0x43, 0x5c, 0x65, 0x5d, 0x52, 0x32, 0x77, 0x53, 0x5f, - 0x4a, 0x5a, 0x4f, 0x5e, 0x4e, 0x61, 0x4b, 0x5b, 0x4a, 0x53, 0x3e, 0x61, - 0x47, 0x24, 0x3e, 0x48, 0x4d, 0x43, 0x40, 0x53, 0x4e, 0x41, 0x43, 0x3d, - 0x50, 0x49, 0x41, 0x3a, 0x4e, 0x4b, 0x48, 0x49, 0x48, 0x49, 0x46, 0x50, - 0x4f, 0x4b, 0x47, 0x4b, 0x48, 0x52, 0x3e, 0x4d, 0x4d, 0x59, 0x4c, 0x3e, - 0x52, 0x49, 0x4f, 0x5e, 0x54, 0x59, 0x47, 0x4d, 0x40, 0x4c, 0x4b, 0x64, - 0x42, 0x4c, 0x53, 0x46, 0x4e, 0x50, 0x46, 0x6a, 0x41, 0x59, 0x44, 0x4b, - 0x4f, 0x44, 0x52, 0x6c, 0x54, 0x4e, 0x46, 0x48, 0x42, 0x3d, 0x44, 0x67, - 0x44, 0x4f, 0x47, 0x54, 0x4c, 0x4f, 0x43, 0x61, 0x4c, 0x54, 0x4f, 0x43, - 0x49, 0x40, 0x4a, 0x5f, 0x4a, 0x52, 0x47, 0x43, 0x4c, 0x43, 0x49, 0x53, - 0x4c, 0x4b, 0x43, 0x3d, 0x4e, 0x45, 0x49, 0x50, 0x44, 0x53, 0x4f, 0x48, - 0x4b, 0x46, 0x44, 0x3c, 0x50, 0x42, 0x43, 0x40, 0x47, 0x43, 0x42, 0x34, - 0x47, 0x42, 0x3f, 0x4a, 0x48, 0x42, 0x48, 0x4c, 0x42, 0x4c, 0x4e, 0x47, - 0x48, 0x47, 0x51, 0x51, 0x4d, 0x3d, 0x3e, 0x4b, 0x54, 0x4c, 0x4c, 0x59, - 0x4f, 0x50, 0x57, 0x3c, 0x54, 0x62, 0x54, 0x35, 0x3d, 0x5a, 0x5b, 0x47, - 0x59, 0x63, 0x66, 0x4d, 0x3c, 0x79, 0x50, 0x5f, 0x45, 0x58, 0x4e, 0x5d, - 0x48, 0x61, 0x43, 0x54, 0x47, 0x54, 0x4d, 0x54, 0x4b, 0x25, 0x41, 0x44, - 0x4c, 0x4a, 0x3b, 0x52, 0x47, 0x3c, 0x45, 0x3c, 0x53, 0x44, 0x44, 0x40, - 0x50, 0x4c, 0x45, 0x3a, 0x4c, 0x51, 0x44, 0x49, 0x4d, 0x52, 0x4d, 0x4b, - 0x45, 0x52, 0x3d, 0x50, 0x4a, 0x58, 0x4a, 0x47, 0x4d, 0x47, 0x4e, 0x52, - 0x4f, 0x4d, 0x4f, 0x49, 0x52, 0x52, 0x4c, 0x5e, 0x47, 0x4d, 0x46, 0x4d, - 0x4c, 0x48, 0x50, 0x70, 0x41, 0x4a, 0x48, 0x3d, 0x45, 0x48, 0x45, 0x74, - 0x47, 0x4c, 0x43, 0x4f, 0x4a, 0x4a, 0x40, 0x68, 0x52, 0x49, 0x3e, 0x3e, - 0x4e, 0x4b, 0x4b, 0x69, 0x42, 0x4f, 0x45, 0x47, 0x3f, 0x45, 0x46, 0x56, - 0x45, 0x4a, 0x47, 0x44, 0x52, 0x4b, 0x53, 0x4e, 0x4e, 0x46, 0x45, 0x40, - 0x47, 0x4b, 0x53, 0x52, 0x53, 0x51, 0x4f, 0x46, 0x42, 0x43, 0x50, 0x3e, - 0x48, 0x4e, 0x41, 0x53, 0x4d, 0x48, 0x48, 0x33, 0x40, 0x43, 0x4b, 0x42, - 0x52, 0x4c, 0x42, 0x4e, 0x41, 0x4e, 0x4f, 0x50, 0x43, 0x49, 0x4d, 0x47, - 0x4a, 0x3a, 0x3f, 0x51, 0x51, 0x44, 0x4e, 0x54, 0x40, 0x55, 0x59, 0x3c, - 0x57, 0x67, 0x4e, 0x2e, 0x4c, 0x5b, 0x5b, 0x51, 0x58, 0x63, 0x62, 0x52, - 0x3c, 0x72, 0x51, 0x5a, 0x4e, 0x53, 0x4a, 0x5c, 0x51, 0x69, 0x42, 0x51, - 0x48, 0x54, 0x48, 0x57, 0x3e, 0x37, 0x3f, 0x4d, 0x4d, 0x4a, 0x35, 0x57, - 0x4e, 0x40, 0x45, 0x4a, 0x45, 0x4e, 0x49, 0x40, 0x49, 0x53, 0x51, 0x44, - 0x4a, 0x50, 0x4b, 0x4b, 0x50, 0x4f, 0x3e, 0x44, 0x45, 0x44, 0x4c, 0x51, - 0x47, 0x51, 0x46, 0x42, 0x48, 0x50, 0x49, 0x4d, 0x43, 0x54, 0x52, 0x4d, - 0x4e, 0x4f, 0x3f, 0x63, 0x54, 0x57, 0x41, 0x44, 0x4e, 0x50, 0x4e, 0x66, - 0x41, 0x53, 0x4b, 0x4d, 0x4e, 0x4f, 0x43, 0x6d, 0x4e, 0x51, 0x49, 0x4f, - 0x49, 0x4a, 0x4a, 0x6c, 0x4b, 0x4f, 0x3d, 0x47, 0x4d, 0x51, 0x3c, 0x66, - 0x4b, 0x56, 0x3e, 0x4c, 0x41, 0x46, 0x45, 0x68, 0x47, 0x4b, 0x4a, 0x54, - 0x53, 0x48, 0x51, 0x59, 0x45, 0x43, 0x50, 0x45, 0x4f, 0x45, 0x42, 0x55, - 0x48, 0x52, 0x4c, 0x46, 0x52, 0x49, 0x47, 0x3d, 0x55, 0x48, 0x52, 0x52, - 0x40, 0x4e, 0x47, 0x31, 0x45, 0x4f, 0x42, 0x4a, 0x4e, 0x50, 0x42, 0x4a, - 0x49, 0x57, 0x46, 0x4b, 0x45, 0x4e, 0x4d, 0x46, 0x47, 0x43, 0x50, 0x4e, - 0x4f, 0x4c, 0x53, 0x55, 0x45, 0x51, 0x5b, 0x3a, 0x52, 0x64, 0x54, 0x2d, - 0x42, 0x59, 0x59, 0x45, 0x59, 0x67, 0x69, 0x53, 0x3f, 0x78, 0x50, 0x60, - 0x4c, 0x4c, 0x5b, 0x53, 0x45, 0x63, 0x49, 0x63, 0x51, 0x4c, 0x41, 0x4e, - 0x4b, 0x37, 0x45, 0x4e, 0x48, 0x4c, 0x39, 0x55, 0x44, 0x37, 0x3c, 0x49, - 0x44, 0x56, 0x3e, 0x40, 0x4d, 0x45, 0x4c, 0x43, 0x42, 0x41, 0x40, 0x42, - 0x57, 0x4f, 0x43, 0x3f, 0x52, 0x53, 0x51, 0x4b, 0x4b, 0x55, 0x46, 0x40, - 0x49, 0x45, 0x40, 0x4f, 0x47, 0x58, 0x4b, 0x53, 0x4e, 0x52, 0x54, 0x5e, - 0x4b, 0x51, 0x50, 0x44, 0x50, 0x4b, 0x4f, 0x70, 0x49, 0x4f, 0x4c, 0x50, - 0x45, 0x56, 0x4b, 0x6b, 0x49, 0x52, 0x4a, 0x3f, 0x44, 0x4b, 0x48, 0x72, - 0x4c, 0x47, 0x4e, 0x43, 0x46, 0x4c, 0x4f, 0x61, 0x4a, 0x52, 0x52, 0x46, - 0x4a, 0x4d, 0x46, 0x65, 0x48, 0x4e, 0x4d, 0x4e, 0x46, 0x4e, 0x53, 0x59, - 0x43, 0x49, 0x43, 0x47, 0x45, 0x47, 0x53, 0x50, 0x3e, 0x4d, 0x41, 0x46, - 0x4c, 0x4a, 0x4c, 0x35, 0x3f, 0x4f, 0x50, 0x48, 0x47, 0x4d, 0x4c, 0x32, - 0x45, 0x53, 0x43, 0x4d, 0x4e, 0x4a, 0x3e, 0x4b, 0x55, 0x4f, 0x53, 0x4c, - 0x4a, 0x4d, 0x48, 0x53, 0x4f, 0x3a, 0x47, 0x4b, 0x4e, 0x4e, 0x51, 0x59, - 0x41, 0x50, 0x57, 0x38, 0x5d, 0x63, 0x59, 0x2b, 0x45, 0x53, 0x5a, 0x4e, - 0x5c, 0x60, 0x5e, 0x4c, 0x41, 0x6f, 0x53, 0x5c, 0x48, 0x53, 0x56, 0x54, - 0x4b, 0x62, 0x46, 0x63, 0x47, 0x4e, 0x40, 0x51, 0x43, 0x36, 0x44, 0x42, - 0x46, 0x51, 0x41, 0x54, 0x4e, 0x36, 0x40, 0x4b, 0x55, 0x49, 0x40, 0x3f, - 0x4b, 0x42, 0x4a, 0x4a, 0x48, 0x47, 0x40, 0x43, 0x4d, 0x4f, 0x55, 0x3f, - 0x53, 0x42, 0x4d, 0x56, 0x49, 0x51, 0x4f, 0x41, 0x3b, 0x48, 0x43, 0x4e, - 0x4b, 0x5c, 0x4f, 0x45, 0x4a, 0x4c, 0x46, 0x66, 0x43, 0x45, 0x46, 0x48, - 0x4f, 0x4e, 0x40, 0x71, 0x4b, 0x4e, 0x3e, 0x42, 0x4d, 0x52, 0x42, 0x71, - 0x4c, 0x54, 0x4f, 0x3f, 0x4c, 0x43, 0x4a, 0x73, 0x48, 0x48, 0x4c, 0x4b, - 0x4c, 0x4d, 0x40, 0x72, 0x3e, 0x51, 0x49, 0x48, 0x52, 0x53, 0x45, 0x65, - 0x52, 0x4e, 0x4f, 0x44, 0x4c, 0x43, 0x4a, 0x5e, 0x3e, 0x56, 0x46, 0x55, - 0x55, 0x43, 0x49, 0x51, 0x4f, 0x52, 0x49, 0x4d, 0x46, 0x47, 0x49, 0x3e, - 0x51, 0x49, 0x41, 0x53, 0x42, 0x47, 0x46, 0x3b, 0x4d, 0x4e, 0x48, 0x44, - 0x42, 0x48, 0x4c, 0x47, 0x42, 0x4e, 0x4a, 0x3e, 0x44, 0x54, 0x4a, 0x4d, - 0x49, 0x41, 0x41, 0x53, 0x52, 0x4c, 0x4c, 0x56, 0x49, 0x4a, 0x5a, 0x3f, - 0x5b, 0x5c, 0x59, 0x2f, 0x49, 0x52, 0x5a, 0x4e, 0x5a, 0x61, 0x67, 0x4c, - 0x41, 0x6f, 0x5a, 0x5a, 0x40, 0x5a, 0x54, 0x4e, 0x49, 0x66, 0x45, 0x5a, - 0x4a, 0x45, 0x44, 0x4b, 0x44, 0x36, 0x41, 0x4c, 0x45, 0x44, 0x3d, 0x51, - 0x3f, 0x35, 0x3c, 0x46, 0x53, 0x5c, 0x3f, 0x3e, 0x50, 0x43, 0x46, 0x4b, - 0x40, 0x54, 0x41, 0x47, 0x4b, 0x51, 0x41, 0x46, 0x4a, 0x4d, 0x51, 0x52, - 0x43, 0x58, 0x45, 0x46, 0x4e, 0x46, 0x4a, 0x4b, 0x44, 0x54, 0x4c, 0x4c, - 0x43, 0x59, 0x48, 0x61, 0x4e, 0x4f, 0x4d, 0x4d, 0x4a, 0x52, 0x4c, 0x6e, - 0x49, 0x57, 0x48, 0x4d, 0x46, 0x46, 0x4d, 0x72, 0x4a, 0x4e, 0x47, 0x44, - 0x49, 0x4f, 0x48, 0x73, 0x42, 0x40, 0x4d, 0x44, 0x4d, 0x57, 0x3e, 0x69, - 0x50, 0x52, 0x4c, 0x55, 0x46, 0x4c, 0x44, 0x5f, 0x4b, 0x4d, 0x55, 0x4c, - 0x48, 0x49, 0x4a, 0x5e, 0x47, 0x4b, 0x45, 0x53, 0x55, 0x53, 0x4d, 0x53, - 0x47, 0x5c, 0x45, 0x4e, 0x4e, 0x52, 0x4c, 0x39, 0x4b, 0x4c, 0x49, 0x46, - 0x4a, 0x4e, 0x4b, 0x33, 0x46, 0x47, 0x52, 0x41, 0x49, 0x4b, 0x4c, 0x48, - 0x51, 0x53, 0x44, 0x4c, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x4b, 0x50, 0x47, - 0x4d, 0x4b, 0x4c, 0x4f, 0x44, 0x45, 0x58, 0x3c, 0x56, 0x5a, 0x56, 0x23, - 0x4f, 0x4d, 0x5c, 0x4e, 0x59, 0x5a, 0x65, 0x43, 0x45, 0x66, 0x54, 0x5f, - 0x45, 0x5e, 0x54, 0x4f, 0x48, 0x5f, 0x44, 0x59, 0x48, 0x46, 0x47, 0x49, - 0x4d, 0x3c, 0x49, 0x54, 0x3e, 0x48, 0x43, 0x5b, 0x4a, 0x35, 0x41, 0x43, - 0x4b, 0x55, 0x43, 0x38, 0x46, 0x42, 0x4a, 0x4e, 0x54, 0x4b, 0x4d, 0x46, - 0x43, 0x4e, 0x44, 0x47, 0x56, 0x4c, 0x51, 0x57, 0x41, 0x4d, 0x43, 0x41, - 0x51, 0x47, 0x41, 0x51, 0x51, 0x4f, 0x46, 0x50, 0x52, 0x4e, 0x4d, 0x60, - 0x41, 0x49, 0x46, 0x50, 0x48, 0x56, 0x42, 0x6d, 0x40, 0x45, 0x44, 0x55, - 0x40, 0x4e, 0x40, 0x7c, 0x47, 0x5a, 0x44, 0x44, 0x45, 0x56, 0x55, 0x71, - 0x47, 0x4b, 0x4b, 0x45, 0x4f, 0x54, 0x4c, 0x73, 0x48, 0x55, 0x44, 0x4d, - 0x4a, 0x47, 0x49, 0x5e, 0x4d, 0x52, 0x4e, 0x4c, 0x48, 0x52, 0x48, 0x58, - 0x4c, 0x5a, 0x49, 0x4b, 0x53, 0x46, 0x4d, 0x4b, 0x48, 0x53, 0x41, 0x49, - 0x4a, 0x56, 0x51, 0x3a, 0x4c, 0x4e, 0x4f, 0x51, 0x4c, 0x59, 0x47, 0x45, - 0x4f, 0x50, 0x4a, 0x4f, 0x4d, 0x3f, 0x44, 0x4e, 0x42, 0x4a, 0x4a, 0x43, - 0x46, 0x4e, 0x4c, 0x4f, 0x47, 0x47, 0x4c, 0x4b, 0x52, 0x50, 0x50, 0x4b, - 0x42, 0x45, 0x54, 0x44, 0x54, 0x59, 0x4c, 0x2b, 0x4d, 0x4c, 0x55, 0x4e, - 0x5c, 0x5b, 0x5a, 0x42, 0x47, 0x5e, 0x56, 0x59, 0x47, 0x65, 0x55, 0x4c, - 0x4c, 0x59, 0x42, 0x5a, 0x4e, 0x46, 0x4e, 0x4b, 0x53, 0x46, 0x49, 0x56, - 0x48, 0x58, 0x4b, 0x4f, 0x45, 0x38, 0x40, 0x44, 0x49, 0x51, 0x4a, 0x3b, - 0x53, 0x40, 0x40, 0x48, 0x51, 0x49, 0x44, 0x46, 0x52, 0x4b, 0x4e, 0x45, - 0x48, 0x5a, 0x4e, 0x57, 0x44, 0x53, 0x49, 0x40, 0x4c, 0x47, 0x41, 0x4f, - 0x49, 0x55, 0x46, 0x50, 0x57, 0x5b, 0x48, 0x66, 0x50, 0x49, 0x51, 0x55, - 0x55, 0x4f, 0x47, 0x72, 0x49, 0x4f, 0x41, 0x4c, 0x49, 0x42, 0x48, 0x75, - 0x4a, 0x55, 0x45, 0x4a, 0x41, 0x51, 0x41, 0x70, 0x47, 0x49, 0x42, 0x52, - 0x4f, 0x47, 0x46, 0x63, 0x4f, 0x53, 0x46, 0x4f, 0x49, 0x53, 0x52, 0x63, - 0x4c, 0x59, 0x46, 0x41, 0x49, 0x51, 0x3e, 0x53, 0x45, 0x52, 0x51, 0x40, - 0x4f, 0x4c, 0x41, 0x4c, 0x47, 0x4a, 0x46, 0x47, 0x53, 0x47, 0x48, 0x39, - 0x53, 0x4b, 0x46, 0x4b, 0x50, 0x4c, 0x41, 0x40, 0x48, 0x4e, 0x49, 0x4e, - 0x44, 0x53, 0x44, 0x4e, 0x53, 0x49, 0x49, 0x4e, 0x46, 0x3f, 0x45, 0x42, - 0x4c, 0x47, 0x42, 0x4e, 0x49, 0x4a, 0x49, 0x44, 0x51, 0x48, 0x57, 0x4c, - 0x4d, 0x60, 0x4e, 0x2d, 0x46, 0x4d, 0x58, 0x53, 0x5c, 0x56, 0x5e, 0x41, - 0x3e, 0x66, 0x53, 0x5b, 0x49, 0x59, 0x5a, 0x55, 0x4e, 0x59, 0x46, 0x4a, - 0x44, 0x42, 0x45, 0x3d, 0x4d, 0x45, 0x44, 0x4f, 0x4d, 0x53, 0x42, 0x5a, - 0x43, 0x3c, 0x48, 0x4f, 0x44, 0x59, 0x3f, 0x33, 0x45, 0x48, 0x43, 0x45, - 0x4d, 0x56, 0x48, 0x44, 0x3e, 0x48, 0x46, 0x4d, 0x44, 0x53, 0x46, 0x4e, - 0x45, 0x52, 0x40, 0x46, 0x4c, 0x50, 0x4e, 0x4b, 0x4d, 0x46, 0x48, 0x46, - 0x50, 0x52, 0x4e, 0x57, 0x3f, 0x4a, 0x49, 0x50, 0x53, 0x4e, 0x41, 0x66, - 0x49, 0x4f, 0x40, 0x4b, 0x50, 0x4c, 0x4a, 0x70, 0x42, 0x51, 0x41, 0x4c, - 0x50, 0x4f, 0x46, 0x60, 0x45, 0x47, 0x54, 0x4c, 0x49, 0x59, 0x52, 0x61, - 0x4a, 0x53, 0x52, 0x4f, 0x4b, 0x4c, 0x46, 0x56, 0x4b, 0x54, 0x4f, 0x47, - 0x53, 0x49, 0x4f, 0x50, 0x4a, 0x54, 0x45, 0x4e, 0x47, 0x48, 0x47, 0x42, - 0x49, 0x44, 0x46, 0x46, 0x55, 0x4c, 0x4f, 0x36, 0x4c, 0x49, 0x3f, 0x4e, - 0x45, 0x4b, 0x4b, 0x36, 0x48, 0x4f, 0x4b, 0x50, 0x45, 0x47, 0x49, 0x3f, - 0x50, 0x4b, 0x52, 0x48, 0x4c, 0x41, 0x49, 0x43, 0x4e, 0x3c, 0x43, 0x45, - 0x3e, 0x45, 0x48, 0x44, 0x4d, 0x48, 0x56, 0x47, 0x4b, 0x54, 0x52, 0x2b, - 0x4d, 0x4e, 0x57, 0x4f, 0x57, 0x4f, 0x56, 0x43, 0x48, 0x5f, 0x4c, 0x51, - 0x4d, 0x58, 0x4f, 0x4e, 0x50, 0x50, 0x48, 0x4a, 0x4d, 0x3f, 0x47, 0x40, - 0x4b, 0x4a, 0x4e, 0x4b, 0x4a, 0x58, 0x42, 0x49, 0x3f, 0x42, 0x3d, 0x4d, - 0x46, 0x53, 0x45, 0x3e, 0x4e, 0x49, 0x4f, 0x4a, 0x47, 0x46, 0x40, 0x3e, - 0x4c, 0x4d, 0x4d, 0x45, 0x4a, 0x56, 0x40, 0x4a, 0x47, 0x57, 0x4f, 0x48, - 0x4f, 0x48, 0x47, 0x49, 0x4e, 0x52, 0x50, 0x48, 0x42, 0x52, 0x43, 0x5a, - 0x49, 0x42, 0x4f, 0x4f, 0x51, 0x51, 0x50, 0x5c, 0x4b, 0x43, 0x4b, 0x48, - 0x50, 0x51, 0x4b, 0x6d, 0x53, 0x4e, 0x44, 0x4c, 0x4c, 0x51, 0x46, 0x5b, - 0x44, 0x48, 0x4d, 0x4c, 0x46, 0x4f, 0x54, 0x54, 0x4e, 0x54, 0x42, 0x4e, - 0x4c, 0x49, 0x49, 0x58, 0x49, 0x53, 0x53, 0x4a, 0x4e, 0x4b, 0x47, 0x53, - 0x43, 0x55, 0x46, 0x51, 0x3d, 0x3d, 0x4c, 0x47, 0x4e, 0x51, 0x47, 0x48, - 0x4b, 0x4c, 0x42, 0x3b, 0x43, 0x4f, 0x44, 0x4d, 0x54, 0x4b, 0x4a, 0x47, - 0x4c, 0x42, 0x4b, 0x43, 0x41, 0x4e, 0x4d, 0x50, 0x45, 0x46, 0x41, 0x4a, - 0x49, 0x49, 0x54, 0x47, 0x4c, 0x4b, 0x50, 0x4e, 0x3f, 0x43, 0x40, 0x41, - 0x44, 0x54, 0x51, 0x47, 0x4c, 0x4b, 0x4f, 0x34, 0x4d, 0x4c, 0x4f, 0x49, - 0x56, 0x4e, 0x4b, 0x3e, 0x48, 0x53, 0x4e, 0x56, 0x49, 0x4e, 0x4c, 0x40, - 0x55, 0x4a, 0x46, 0x4f, 0x48, 0x4a, 0x55, 0x41, 0x55, 0x3d, 0x47, 0x51, - 0x50, 0x51, 0x45, 0x51, 0x4b, 0x4e, 0x4a, 0x4f, 0x4b, 0x45, 0x42, 0x3c, - 0x4e, 0x46, 0x47, 0x49, 0x4a, 0x4c, 0x48, 0x41, 0x4f, 0x4a, 0x44, 0x45, - 0x4e, 0x4e, 0x43, 0x41, 0x4c, 0x47, 0x48, 0x49, 0x4c, 0x48, 0x4f, 0x4a, - 0x4f, 0x4a, 0x4b, 0x45, 0x42, 0x40, 0x52, 0x55, 0x4f, 0x49, 0x44, 0x54, - 0x49, 0x48, 0x51, 0x4d, 0x44, 0x4a, 0x4d, 0x49, 0x4e, 0x4e, 0x51, 0x5d, - 0x42, 0x4d, 0x49, 0x3f, 0x48, 0x58, 0x40, 0x5e, 0x48, 0x4f, 0x49, 0x53, - 0x45, 0x47, 0x4f, 0x53, 0x4d, 0x4f, 0x4d, 0x4d, 0x46, 0x55, 0x43, 0x51, - 0x4f, 0x51, 0x4a, 0x4e, 0x49, 0x42, 0x49, 0x50, 0x47, 0x4d, 0x42, 0x47, - 0x46, 0x50, 0x55, 0x47, 0x4d, 0x47, 0x3e, 0x51, 0x4d, 0x43, 0x44, 0x39, - 0x4e, 0x4b, 0x41, 0x48, 0x52, 0x53, 0x4d, 0x39, 0x4d, 0x51, 0x4c, 0x46, - 0x4e, 0x47, 0x49, 0x41, 0x45, 0x4a, 0x4a, 0x45, 0x50, 0x4a, 0x40, 0x48, - 0x43, 0x47, 0x44, 0x50, 0x4d, 0x47, 0x4a, 0x47, 0x45, 0x57, 0x41, 0x34, - 0x51, 0x40, 0x45, 0x44, 0x3c, 0x47, 0x46, 0x47, 0x44, 0x48, 0x42, 0x40, - 0x37, 0x53, 0x4a, 0x43, 0x49, 0x4b, 0x43, 0x44, 0x4f, 0x4f, 0x48, 0x48, - 0x53, 0x49, 0x4b, 0x48, 0x4e, 0x4c, 0x42, 0x45, 0x4c, 0x4a, 0x4a, 0x46, - 0x47, 0x57, 0x3e, 0x46, 0x46, 0x45, 0x4a, 0x43, 0x46, 0x49, 0x43, 0x52, - 0x3e, 0x48, 0x4a, 0x4b, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4b, 0x4e, 0x44, - 0x42, 0x44, 0x50, 0x41, 0x49, 0x49, 0x4d, 0x4b, 0x44, 0x46, 0x4a, 0x52, - 0x4d, 0x47, 0x49, 0x4b, 0x4d, 0x49, 0x41, 0x48, 0x4b, 0x3f, 0x45, 0x4f, - 0x51, 0x41, 0x55, 0x42, 0x49, 0x4b, 0x4b, 0x51, 0x4f, 0x4f, 0x42, 0x4e, - 0x4e, 0x4a, 0x52, 0x41, 0x4f, 0x42, 0x48, 0x3d, 0x4a, 0x44, 0x50, 0x4b, - 0x49, 0x45, 0x51, 0x46, 0x51, 0x44, 0x4d, 0x47, 0x4a, 0x4a, 0x4d, 0x49, - 0x4d, 0x48, 0x4d, 0x4f, 0x4d, 0x44, 0x48, 0x4e, 0x4a, 0x4b, 0x40, 0x4f, - 0x47, 0x3a, 0x41, 0x47, 0x4a, 0x4a, 0x4a, 0x48, 0x42, 0x41, 0x4d, 0x56, - 0x3f, 0x52, 0x4d, 0x4c, 0x44, 0x48, 0x47, 0x4e, 0x51, 0x4c, 0x49, 0x47, - 0x44, 0x4c, 0x4b, 0x47, 0x48, 0x46, 0x47, 0x4f, 0x43, 0x41, 0x3e, 0x47, - 0x53, 0x4a, 0x46, 0x42, 0x46, 0x61, 0x43, 0x30, 0x4e, 0x52, 0x43, 0x45, - 0x32, 0x4a, 0x45, 0x48, 0x51, 0x3e, 0x44, 0x3b, 0x3a, 0x63, 0x4c, 0x46, - 0x4c, 0x49, 0x3d, 0x41, 0x52, 0x53, 0x43, 0x43, 0x45, 0x3d, 0x48, 0x40, - 0x4b, 0x4a, 0x49, 0x48, 0x4d, 0x49, 0x4b, 0x4c, 0x3f, 0x4e, 0x4b, 0x47, - 0x45, 0x4d, 0x3f, 0x4d, 0x43, 0x50, 0x48, 0x4b, 0x54, 0x3e, 0x44, 0x4e, - 0x3e, 0x4c, 0x43, 0x4b, 0x4c, 0x4b, 0x3e, 0x49, 0x50, 0x52, 0x4a, 0x4a, - 0x50, 0x50, 0x43, 0x4e, 0x49, 0x48, 0x51, 0x50, 0x47, 0x3d, 0x45, 0x4b, - 0x47, 0x46, 0x4d, 0x4c, 0x45, 0x4d, 0x4a, 0x4d, 0x42, 0x4d, 0x47, 0x4f, - 0x40, 0x43, 0x46, 0x51, 0x47, 0x4b, 0x43, 0x49, 0x49, 0x50, 0x4b, 0x4b, - 0x46, 0x4a, 0x4c, 0x48, 0x49, 0x47, 0x4b, 0x56, 0x55, 0x4f, 0x49, 0x4f, - 0x4f, 0x4e, 0x4b, 0x49, 0x4a, 0x4a, 0x49, 0x47, 0x44, 0x4b, 0x47, 0x50, - 0x46, 0x4c, 0x46, 0x4c, 0x4b, 0x4e, 0x49, 0x57, 0x4d, 0x3e, 0x46, 0x47, - 0x50, 0x45, 0x4f, 0x52, 0x3e, 0x4d, 0x49, 0x4a, 0x40, 0x49, 0x4f, 0x5c, - 0x3e, 0x4a, 0x47, 0x45, 0x47, 0x41, 0x44, 0x3f, 0x4b, 0x4a, 0x52, 0x43, - 0x41, 0x43, 0x43, 0x47, 0x55, 0x49, 0x42, 0x4c, 0x58, 0x4b, 0x42, 0x48, - 0x4b, 0x5a, 0x36, 0x33, 0x53, 0x57, 0x4d, 0x4a, 0x37, 0x4c, 0x3e, 0x48, - 0x43, 0x46, 0x39, 0x3c, 0x34, 0x65, 0x47, 0x3d, 0x47, 0x42, 0x3c, 0x3e, - 0x45, 0x5b, 0x44, 0x3e, 0x45, 0x43, 0x46, 0x43, 0x59, 0x4e, 0x48, 0x46, - 0x43, 0x3f, 0x46, 0x47, 0x4e, 0x53, 0x50, 0x4b, 0x4a, 0x3f, 0x4a, 0x54, - 0x4c, 0x4a, 0x43, 0x50, 0x4c, 0x42, 0x4d, 0x55, 0x4d, 0x51, 0x51, 0x46, - 0x49, 0x41, 0x50, 0x44, 0x4a, 0x4b, 0x4b, 0x43, 0x4b, 0x4e, 0x47, 0x4b, - 0x3e, 0x4e, 0x44, 0x4d, 0x49, 0x41, 0x49, 0x44, 0x50, 0x4d, 0x45, 0x4e, - 0x4b, 0x50, 0x45, 0x4c, 0x46, 0x4a, 0x46, 0x42, 0x50, 0x45, 0x48, 0x53, - 0x4d, 0x44, 0x42, 0x50, 0x4c, 0x49, 0x45, 0x55, 0x4d, 0x42, 0x43, 0x41, - 0x4c, 0x41, 0x4e, 0x4d, 0x42, 0x4e, 0x3f, 0x44, 0x4d, 0x4c, 0x4b, 0x4a, - 0x47, 0x47, 0x4e, 0x54, 0x43, 0x40, 0x41, 0x55, 0x49, 0x49, 0x4e, 0x49, - 0x52, 0x4e, 0x46, 0x58, 0x4b, 0x3d, 0x4a, 0x44, 0x4e, 0x47, 0x53, 0x58, - 0x47, 0x42, 0x52, 0x46, 0x49, 0x4b, 0x47, 0x5a, 0x4c, 0x46, 0x46, 0x49, - 0x4b, 0x4d, 0x3d, 0x48, 0x40, 0x54, 0x48, 0x4c, 0x4c, 0x44, 0x4c, 0x46, - 0x47, 0x4b, 0x4d, 0x44, 0x5a, 0x4a, 0x3e, 0x46, 0x48, 0x53, 0x39, 0x30, - 0x51, 0x60, 0x4d, 0x47, 0x35, 0x4f, 0x45, 0x45, 0x4a, 0x4b, 0x42, 0x3f, - 0x38, 0x6c, 0x3d, 0x40, 0x44, 0x48, 0x3a, 0x3b, 0x46, 0x5e, 0x45, 0x3b, - 0x47, 0x47, 0x45, 0x42, 0x53, 0x55, 0x44, 0x45, 0x46, 0x43, 0x48, 0x48, - 0x52, 0x5d, 0x3e, 0x41, 0x53, 0x42, 0x48, 0x55, 0x49, 0x4d, 0x4a, 0x46, - 0x52, 0x46, 0x51, 0x48, 0x44, 0x46, 0x48, 0x41, 0x49, 0x49, 0x49, 0x49, - 0x41, 0x4d, 0x40, 0x4f, 0x45, 0x46, 0x45, 0x3f, 0x53, 0x40, 0x46, 0x43, - 0x47, 0x4d, 0x50, 0x4c, 0x55, 0x48, 0x45, 0x47, 0x4f, 0x46, 0x42, 0x4d, - 0x41, 0x48, 0x46, 0x4e, 0x42, 0x48, 0x48, 0x45, 0x41, 0x45, 0x48, 0x4a, - 0x40, 0x49, 0x43, 0x4b, 0x48, 0x4a, 0x4c, 0x45, 0x4b, 0x48, 0x48, 0x4f, - 0x40, 0x4b, 0x4a, 0x44, 0x50, 0x4a, 0x43, 0x50, 0x4c, 0x44, 0x46, 0x4c, - 0x42, 0x44, 0x4e, 0x55, 0x47, 0x49, 0x48, 0x47, 0x52, 0x4e, 0x44, 0x59, - 0x4e, 0x44, 0x4a, 0x48, 0x49, 0x4a, 0x42, 0x4e, 0x3e, 0x39, 0x51, 0x45, - 0x4d, 0x49, 0x4f, 0x54, 0x51, 0x4b, 0x50, 0x44, 0x53, 0x4f, 0x4d, 0x48, - 0x42, 0x45, 0x4e, 0x40, 0x4a, 0x48, 0x43, 0x48, 0x52, 0x54, 0x4d, 0x49, - 0x5f, 0x53, 0x46, 0x4e, 0x3f, 0x5a, 0x36, 0x31, 0x52, 0x60, 0x4b, 0x4a, - 0x32, 0x51, 0x40, 0x44, 0x46, 0x52, 0x44, 0x41, 0x3a, 0x6e, 0x41, 0x3e, - 0x47, 0x3e, 0x3a, 0x2a, 0x44, 0x5a, 0x40, 0x3c, 0x4d, 0x48, 0x46, 0x3b, - 0x5e, 0x58, 0x4d, 0x47, 0x51, 0x3a, 0x4b, 0x48, 0x5b, 0x5a, 0x54, 0x43, - 0x50, 0x4c, 0x54, 0x54, 0x49, 0x47, 0x4f, 0x48, 0x50, 0x40, 0x4f, 0x4a, - 0x42, 0x42, 0x3c, 0x41, 0x43, 0x4e, 0x53, 0x49, 0x4b, 0x4d, 0x49, 0x41, - 0x4c, 0x3e, 0x40, 0x49, 0x40, 0x44, 0x49, 0x4f, 0x50, 0x4a, 0x42, 0x3a, - 0x49, 0x4b, 0x47, 0x50, 0x49, 0x41, 0x52, 0x46, 0x3d, 0x44, 0x46, 0x43, - 0x4b, 0x4b, 0x4d, 0x4b, 0x4e, 0x40, 0x45, 0x43, 0x48, 0x44, 0x55, 0x51, - 0x4a, 0x46, 0x4e, 0x40, 0x53, 0x4a, 0x45, 0x41, 0x48, 0x48, 0x45, 0x4e, - 0x4a, 0x48, 0x40, 0x4c, 0x54, 0x44, 0x42, 0x4d, 0x49, 0x43, 0x45, 0x4c, - 0x43, 0x4f, 0x46, 0x3f, 0x46, 0x4f, 0x4b, 0x59, 0x46, 0x49, 0x54, 0x47, - 0x49, 0x46, 0x45, 0x53, 0x4a, 0x49, 0x54, 0x45, 0x41, 0x45, 0x4c, 0x5e, - 0x50, 0x3d, 0x4d, 0x49, 0x55, 0x4b, 0x49, 0x47, 0x4c, 0x4f, 0x43, 0x3d, - 0x41, 0x4b, 0x43, 0x46, 0x4f, 0x4a, 0x4c, 0x54, 0x5e, 0x4e, 0x40, 0x4d, - 0x3d, 0x59, 0x40, 0x28, 0x54, 0x5f, 0x4d, 0x4b, 0x36, 0x51, 0x3a, 0x47, - 0x4a, 0x55, 0x42, 0x43, 0x3b, 0x72, 0x3b, 0x3d, 0x51, 0x42, 0x3f, 0x2d, - 0x4b, 0x5a, 0x48, 0x44, 0x49, 0x49, 0x3d, 0x39, 0x56, 0x55, 0x46, 0x46, - 0x4b, 0x43, 0x40, 0x4a, 0x52, 0x56, 0x4d, 0x45, 0x4b, 0x48, 0x40, 0x5a, - 0x4e, 0x3a, 0x53, 0x48, 0x4c, 0x44, 0x49, 0x4e, 0x42, 0x47, 0x46, 0x40, - 0x51, 0x42, 0x50, 0x4b, 0x43, 0x53, 0x44, 0x44, 0x46, 0x4c, 0x4c, 0x3c, - 0x42, 0x45, 0x42, 0x45, 0x44, 0x4b, 0x52, 0x3d, 0x47, 0x4b, 0x4c, 0x4e, - 0x52, 0x4a, 0x4e, 0x41, 0x3f, 0x46, 0x43, 0x54, 0x44, 0x53, 0x4e, 0x48, - 0x40, 0x41, 0x4f, 0x45, 0x43, 0x3c, 0x52, 0x49, 0x40, 0x44, 0x4a, 0x3f, - 0x4d, 0x4c, 0x4f, 0x47, 0x44, 0x47, 0x55, 0x47, 0x50, 0x4d, 0x4a, 0x4c, - 0x50, 0x48, 0x47, 0x55, 0x4b, 0x4a, 0x52, 0x49, 0x3d, 0x3f, 0x4f, 0x51, - 0x48, 0x4e, 0x42, 0x4e, 0x42, 0x48, 0x4e, 0x49, 0x4a, 0x50, 0x45, 0x54, - 0x41, 0x43, 0x45, 0x4d, 0x48, 0x48, 0x48, 0x51, 0x53, 0x3e, 0x55, 0x44, - 0x52, 0x56, 0x44, 0x4d, 0x4e, 0x48, 0x4b, 0x43, 0x48, 0x53, 0x48, 0x44, - 0x49, 0x45, 0x4e, 0x50, 0x5d, 0x4a, 0x45, 0x4c, 0x45, 0x55, 0x43, 0x2e, - 0x59, 0x60, 0x4e, 0x4d, 0x32, 0x53, 0x3e, 0x3f, 0x40, 0x63, 0x41, 0x48, - 0x38, 0x73, 0x38, 0x46, 0x50, 0x3e, 0x3c, 0x23, 0x48, 0x61, 0x45, 0x3c, - 0x41, 0x41, 0x36, 0x3b, 0x58, 0x56, 0x4a, 0x40, 0x4f, 0x44, 0x45, 0x4c, - 0x5a, 0x56, 0x47, 0x3f, 0x4d, 0x4b, 0x46, 0x5d, 0x52, 0x47, 0x45, 0x4c, - 0x4a, 0x52, 0x4f, 0x4f, 0x4f, 0x43, 0x4f, 0x47, 0x43, 0x46, 0x3c, 0x4c, - 0x46, 0x55, 0x40, 0x53, 0x43, 0x3e, 0x42, 0x35, 0x51, 0x41, 0x42, 0x3f, - 0x45, 0x3d, 0x41, 0x31, 0x4e, 0x47, 0x48, 0x42, 0x41, 0x45, 0x43, 0x38, - 0x42, 0x40, 0x4a, 0x47, 0x4e, 0x43, 0x40, 0x43, 0x48, 0x49, 0x45, 0x4f, - 0x44, 0x42, 0x4d, 0x42, 0x42, 0x3f, 0x46, 0x52, 0x3c, 0x3c, 0x47, 0x43, - 0x46, 0x47, 0x45, 0x40, 0x4c, 0x44, 0x43, 0x4a, 0x4b, 0x4d, 0x4e, 0x46, - 0x51, 0x45, 0x47, 0x4b, 0x45, 0x50, 0x40, 0x42, 0x4c, 0x4c, 0x4c, 0x4f, - 0x44, 0x3c, 0x49, 0x3c, 0x3f, 0x45, 0x3f, 0x5c, 0x42, 0x3e, 0x4b, 0x4e, - 0x50, 0x45, 0x42, 0x5c, 0x4c, 0x48, 0x50, 0x52, 0x50, 0x47, 0x4b, 0x44, - 0x3d, 0x50, 0x55, 0x4c, 0x48, 0x3f, 0x4b, 0x44, 0x4a, 0x51, 0x42, 0x4c, - 0x60, 0x51, 0x41, 0x4b, 0x46, 0x5c, 0x42, 0x2c, 0x55, 0x61, 0x50, 0x52, - 0x37, 0x5a, 0x3f, 0x43, 0x43, 0x58, 0x3a, 0x4d, 0x3e, 0x72, 0x35, 0x3f, - 0x58, 0x41, 0x40, 0x1f, 0x55, 0x63, 0x3f, 0x49, 0x41, 0x3e, 0x35, 0x41, - 0x65, 0x54, 0x42, 0x45, 0x45, 0x3c, 0x44, 0x45, 0x59, 0x5a, 0x4d, 0x41, - 0x51, 0x46, 0x49, 0x59, 0x4c, 0x41, 0x42, 0x44, 0x4a, 0x45, 0x3f, 0x4a, - 0x4a, 0x44, 0x48, 0x48, 0x52, 0x40, 0x4a, 0x4a, 0x4d, 0x54, 0x44, 0x48, - 0x54, 0x46, 0x49, 0x3b, 0x42, 0x4a, 0x4e, 0x46, 0x4a, 0x45, 0x4f, 0x30, - 0x46, 0x41, 0x47, 0x46, 0x4b, 0x47, 0x46, 0x38, 0x4c, 0x3a, 0x4b, 0x46, - 0x52, 0x48, 0x4f, 0x3e, 0x48, 0x4a, 0x48, 0x4b, 0x44, 0x45, 0x4a, 0x46, - 0x3f, 0x4f, 0x40, 0x44, 0x43, 0x43, 0x4b, 0x39, 0x46, 0x43, 0x49, 0x49, - 0x49, 0x4a, 0x44, 0x48, 0x4c, 0x41, 0x4d, 0x52, 0x4c, 0x4a, 0x46, 0x3d, - 0x41, 0x4b, 0x41, 0x48, 0x45, 0x3b, 0x51, 0x54, 0x4a, 0x39, 0x4d, 0x41, - 0x54, 0x46, 0x4c, 0x53, 0x48, 0x3e, 0x4a, 0x3d, 0x41, 0x52, 0x54, 0x63, - 0x44, 0x4d, 0x4a, 0x43, 0x52, 0x4b, 0x52, 0x52, 0x4e, 0x41, 0x48, 0x42, - 0x48, 0x4d, 0x49, 0x45, 0x51, 0x48, 0x3e, 0x47, 0x5a, 0x52, 0x4a, 0x4e, - 0x3e, 0x59, 0x3c, 0x2e, 0x5c, 0x5b, 0x4c, 0x56, 0x30, 0x59, 0x3a, 0x48, - 0x3d, 0x5c, 0x44, 0x49, 0x40, 0x7c, 0x3a, 0x48, 0x54, 0x40, 0x41, 0x28, - 0x4d, 0x64, 0x46, 0x47, 0x49, 0x40, 0x30, 0x3a, 0x5f, 0x5b, 0x42, 0x37, - 0x49, 0x45, 0x40, 0x43, 0x5b, 0x54, 0x48, 0x4d, 0x4a, 0x47, 0x51, 0x58, - 0x4b, 0x3c, 0x4d, 0x46, 0x4b, 0x52, 0x4c, 0x58, 0x53, 0x46, 0x42, 0x45, - 0x4c, 0x4a, 0x4d, 0x4e, 0x52, 0x4d, 0x46, 0x44, 0x46, 0x3f, 0x46, 0x34, - 0x4f, 0x42, 0x44, 0x46, 0x44, 0x50, 0x47, 0x30, 0x44, 0x3c, 0x42, 0x46, - 0x4f, 0x4a, 0x52, 0x30, 0x55, 0x4f, 0x45, 0x4a, 0x48, 0x4c, 0x4e, 0x35, - 0x4e, 0x3c, 0x45, 0x4a, 0x45, 0x4a, 0x44, 0x3c, 0x4e, 0x4a, 0x51, 0x44, - 0x49, 0x40, 0x4a, 0x40, 0x41, 0x44, 0x4f, 0x4c, 0x43, 0x45, 0x4b, 0x43, - 0x3e, 0x3e, 0x4c, 0x44, 0x48, 0x48, 0x42, 0x42, 0x4d, 0x43, 0x50, 0x4d, - 0x49, 0x3c, 0x45, 0x4f, 0x4c, 0x46, 0x4b, 0x48, 0x4d, 0x4d, 0x49, 0x55, - 0x49, 0x3b, 0x40, 0x44, 0x4a, 0x4b, 0x4e, 0x5e, 0x43, 0x47, 0x45, 0x43, - 0x4d, 0x4d, 0x49, 0x46, 0x4a, 0x44, 0x4e, 0x3e, 0x52, 0x41, 0x47, 0x47, - 0x4a, 0x50, 0x48, 0x43, 0x5d, 0x4f, 0x49, 0x48, 0x43, 0x4f, 0x45, 0x3e, - 0x5a, 0x69, 0x4d, 0x5a, 0x3a, 0x5d, 0x3a, 0x48, 0x42, 0x55, 0x3e, 0x48, - 0x48, 0x7b, 0x37, 0x40, 0x57, 0x45, 0x48, 0x24, 0x50, 0x61, 0x4c, 0x4a, - 0x44, 0x41, 0x34, 0x38, 0x65, 0x5b, 0x4f, 0x3c, 0x4d, 0x3a, 0x4a, 0x4c, - 0x66, 0x55, 0x50, 0x47, 0x4d, 0x46, 0x47, 0x58, 0x4c, 0x48, 0x48, 0x48, - 0x4e, 0x59, 0x4f, 0x4b, 0x45, 0x45, 0x4b, 0x54, 0x46, 0x51, 0x4f, 0x44, - 0x42, 0x55, 0x48, 0x44, 0x48, 0x41, 0x53, 0x2e, 0x4d, 0x45, 0x44, 0x54, - 0x4a, 0x44, 0x53, 0x34, 0x4c, 0x46, 0x47, 0x3f, 0x4c, 0x4b, 0x47, 0x36, - 0x47, 0x41, 0x43, 0x40, 0x51, 0x46, 0x45, 0x33, 0x46, 0x3e, 0x47, 0x50, - 0x3f, 0x48, 0x48, 0x37, 0x41, 0x41, 0x42, 0x3e, 0x45, 0x3d, 0x49, 0x3e, - 0x4f, 0x42, 0x49, 0x4a, 0x46, 0x46, 0x48, 0x44, 0x49, 0x45, 0x46, 0x4a, - 0x4a, 0x47, 0x48, 0x43, 0x44, 0x45, 0x3f, 0x4c, 0x4c, 0x49, 0x4d, 0x51, - 0x4a, 0x4a, 0x49, 0x4c, 0x42, 0x4d, 0x4b, 0x4b, 0x4a, 0x42, 0x47, 0x4d, - 0x3e, 0x4b, 0x47, 0x5c, 0x49, 0x3d, 0x4e, 0x41, 0x44, 0x49, 0x3e, 0x3e, - 0x4b, 0x47, 0x4e, 0x45, 0x44, 0x4a, 0x4d, 0x4a, 0x4f, 0x46, 0x45, 0x52, - 0x60, 0x53, 0x49, 0x50, 0x3d, 0x4f, 0x43, 0x3d, 0x52, 0x64, 0x52, 0x58, - 0x39, 0x5f, 0x36, 0x4c, 0x45, 0x57, 0x42, 0x4b, 0x3f, 0x80, 0x34, 0x47, - 0x58, 0x41, 0x45, 0x1b, 0x4b, 0x5e, 0x4c, 0x40, 0x44, 0x42, 0x39, 0x3a, - 0x5e, 0x5b, 0x4b, 0x3a, 0x4b, 0x3f, 0x45, 0x3e, 0x69, 0x57, 0x4b, 0x45, - 0x4b, 0x3f, 0x45, 0x55, 0x49, 0x49, 0x48, 0x47, 0x41, 0x4f, 0x42, 0x53, - 0x49, 0x40, 0x42, 0x3e, 0x49, 0x47, 0x53, 0x47, 0x45, 0x51, 0x4a, 0x44, - 0x44, 0x45, 0x4e, 0x2a, 0x45, 0x42, 0x4a, 0x4b, 0x46, 0x4d, 0x41, 0x30, - 0x3d, 0x43, 0x3f, 0x48, 0x49, 0x44, 0x4d, 0x2e, 0x48, 0x4a, 0x4c, 0x51, - 0x50, 0x46, 0x3e, 0x2c, 0x4d, 0x3f, 0x47, 0x46, 0x3c, 0x40, 0x4c, 0x38, - 0x4f, 0x46, 0x47, 0x53, 0x3b, 0x3c, 0x4e, 0x3e, 0x49, 0x40, 0x43, 0x4c, - 0x4d, 0x48, 0x45, 0x3c, 0x4d, 0x4c, 0x4d, 0x45, 0x3f, 0x49, 0x4a, 0x43, - 0x4d, 0x41, 0x4b, 0x50, 0x4e, 0x46, 0x50, 0x44, 0x49, 0x44, 0x4e, 0x42, - 0x4a, 0x43, 0x4c, 0x4c, 0x49, 0x49, 0x44, 0x4e, 0x4b, 0x3f, 0x4b, 0x5d, - 0x41, 0x49, 0x4b, 0x46, 0x4e, 0x48, 0x45, 0x51, 0x4d, 0x45, 0x46, 0x45, - 0x4b, 0x4e, 0x3c, 0x4d, 0x3d, 0x41, 0x47, 0x47, 0x64, 0x54, 0x41, 0x55, - 0x47, 0x56, 0x44, 0x3b, 0x53, 0x66, 0x4f, 0x5e, 0x40, 0x5d, 0x38, 0x4a, - 0x41, 0x59, 0x42, 0x48, 0x47, 0xff, 0x36, 0x49, 0x59, 0x41, 0x43, 0x1d, - 0x4d, 0x5e, 0x44, 0x44, 0x50, 0x3f, 0x39, 0x40, 0x68, 0x5e, 0x4a, 0x41, - 0x52, 0x41, 0x43, 0x41, 0x68, 0x51, 0x45, 0x48, 0x4c, 0x46, 0x4a, 0x5e, - 0x4e, 0x40, 0x4d, 0x41, 0x41, 0x5c, 0x3f, 0x4e, 0x4c, 0x37, 0x48, 0x40, - 0x46, 0x47, 0x4f, 0x43, 0x53, 0x52, 0x3d, 0x44, 0x47, 0x44, 0x3d, 0x34, - 0x44, 0x42, 0x4a, 0x43, 0x4d, 0x3f, 0x53, 0x2e, 0x42, 0x47, 0x43, 0x4d, - 0x45, 0x45, 0x47, 0x31, 0x4d, 0x39, 0x41, 0x4a, 0x4a, 0x4d, 0x4b, 0x35, - 0x47, 0x4e, 0x4c, 0x40, 0x4a, 0x44, 0x44, 0x36, 0x3e, 0x49, 0x3f, 0x45, - 0x46, 0x43, 0x4e, 0x3c, 0x4d, 0x47, 0x4c, 0x48, 0x4a, 0x4b, 0x48, 0x39, - 0x46, 0x50, 0x4a, 0x4f, 0x46, 0x41, 0x44, 0x4a, 0x41, 0x4f, 0x4c, 0x4e, - 0x55, 0x46, 0x43, 0x46, 0x4a, 0x48, 0x4e, 0x46, 0x42, 0x40, 0x4f, 0x56, - 0x4c, 0x45, 0x4b, 0x46, 0x4a, 0x47, 0x42, 0x5e, 0x49, 0x4e, 0x46, 0x43, - 0x4e, 0x42, 0x45, 0x48, 0x47, 0x48, 0x4f, 0x45, 0x47, 0x51, 0x4b, 0x4c, - 0x51, 0x39, 0x4d, 0x48, 0x60, 0x57, 0x49, 0x52, 0x3d, 0x57, 0x46, 0x3d, - 0x53, 0x68, 0x4b, 0x60, 0x40, 0x5a, 0x41, 0x4b, 0x46, 0x56, 0x46, 0x4c, - 0x49, 0x7e, 0x2f, 0x48, 0x51, 0x42, 0x40, 0x20, 0x4b, 0x62, 0x4d, 0x41, - 0x4f, 0x43, 0x3d, 0x35, 0x63, 0x63, 0x46, 0x3e, 0x4e, 0x47, 0x40, 0x40, - 0x60, 0x52, 0x4c, 0x46, 0x49, 0x48, 0x4f, 0x56, 0x51, 0x47, 0x52, 0x4e, - 0x4b, 0x59, 0x55, 0x4f, 0x48, 0x3d, 0x48, 0x4a, 0x4d, 0x50, 0x47, 0x47, - 0x51, 0x52, 0x4d, 0x51, 0x45, 0x45, 0x47, 0x2d, 0x4d, 0x41, 0x43, 0x49, - 0x4d, 0x40, 0x4a, 0x2f, 0x4f, 0x43, 0x46, 0x4a, 0x3e, 0x4a, 0x4a, 0x2b, - 0x49, 0x4c, 0x4c, 0x3e, 0x41, 0x4c, 0x4a, 0x2b, 0x40, 0x44, 0x46, 0x4a, - 0x40, 0x44, 0x42, 0x38, 0x52, 0x42, 0x46, 0x51, 0x53, 0x4e, 0x45, 0x31, - 0x45, 0x47, 0x4f, 0x46, 0x49, 0x43, 0x45, 0x3b, 0x4b, 0x4b, 0x4b, 0x4c, - 0x43, 0x4a, 0x4c, 0x43, 0x4e, 0x40, 0x52, 0x44, 0x48, 0x49, 0x47, 0x4b, - 0x4e, 0x3d, 0x4e, 0x44, 0x48, 0x4d, 0x4f, 0x4f, 0x50, 0x36, 0x47, 0x41, - 0x4a, 0x44, 0x45, 0x56, 0x4f, 0x4c, 0x50, 0x4b, 0x45, 0x3e, 0x45, 0x4e, - 0x45, 0x45, 0x43, 0x40, 0x47, 0x4e, 0x45, 0x3e, 0x4a, 0x3f, 0x49, 0x50, - 0x62, 0x55, 0x48, 0x56, 0x3e, 0x57, 0x4f, 0x3b, 0x55, 0x6c, 0x50, 0x5c, - 0x3d, 0x54, 0x3d, 0x46, 0x43, 0x59, 0x3e, 0x51, 0x4d, 0x7b, 0x33, 0x47, - 0x52, 0x43, 0x3f, 0x25, 0x4a, 0x6f, 0x49, 0x3e, 0x50, 0x40, 0x41, 0x30, - 0x5e, 0x5c, 0x4a, 0x43, 0x4d, 0x42, 0x46, 0x3b, 0x63, 0x53, 0x4f, 0x43, - 0x58, 0x48, 0x4b, 0x59, 0x50, 0x4e, 0x4b, 0x51, 0x4a, 0x55, 0x44, 0x46, - 0x4c, 0x3d, 0x4c, 0x52, 0x44, 0x52, 0x4c, 0x41, 0x4f, 0x44, 0x4a, 0x47, - 0x4e, 0x48, 0x49, 0x2e, 0x3e, 0x45, 0x4c, 0x48, 0x41, 0x47, 0x4d, 0x2e, - 0x40, 0x4b, 0x4c, 0x42, 0x4d, 0x40, 0x4e, 0x2e, 0x43, 0x45, 0x4b, 0x43, - 0x3e, 0x49, 0x55, 0x35, 0x43, 0x42, 0x42, 0x40, 0x4e, 0x46, 0x44, 0x37, - 0x49, 0x41, 0x3f, 0x52, 0x47, 0x4b, 0x43, 0x33, 0x4b, 0x47, 0x4b, 0x4c, - 0x4d, 0x4b, 0x3f, 0x42, 0x44, 0x40, 0x49, 0x41, 0x42, 0x49, 0x4b, 0x46, - 0x4e, 0x4e, 0x47, 0x4e, 0x48, 0x48, 0x4b, 0x46, 0x51, 0x4b, 0x46, 0x4d, - 0x47, 0x4f, 0x3e, 0x51, 0x46, 0x4e, 0x46, 0x4b, 0x47, 0x48, 0x4e, 0x55, - 0x4c, 0x3d, 0x47, 0x51, 0x42, 0x45, 0x4f, 0x42, 0x52, 0x50, 0x44, 0x4c, - 0x44, 0x44, 0x43, 0x4d, 0x40, 0x42, 0x4d, 0x4b, 0x5d, 0x4e, 0x47, 0x54, - 0x47, 0x51, 0x43, 0x39, 0x58, 0x66, 0x4e, 0x5a, 0x41, 0x52, 0x36, 0x47, - 0x45, 0x5f, 0x34, 0x50, 0x46, 0x79, 0x30, 0x48, 0x50, 0x45, 0x32, 0x22, - 0x54, 0x64, 0x49, 0x46, 0x45, 0x3c, 0x42, 0x36, 0x65, 0x5c, 0x48, 0x3a, - 0x4d, 0x4b, 0x47, 0x3e, 0x63, 0x56, 0x4a, 0x48, 0x51, 0x42, 0x4f, 0x5e, - 0x4c, 0x44, 0x4b, 0x4c, 0x3d, 0x5a, 0x43, 0x4d, 0x42, 0x40, 0x4f, 0x4d, - 0x3f, 0x3e, 0x46, 0x40, 0x49, 0x42, 0x49, 0x40, 0x49, 0x4c, 0x4a, 0x2e, - 0x4b, 0x3f, 0x53, 0x4b, 0x48, 0x49, 0x3e, 0x34, 0x47, 0x4a, 0x4b, 0x46, - 0x3b, 0x49, 0x46, 0x34, 0x4b, 0x48, 0x4c, 0x49, 0x49, 0x43, 0x4f, 0x2e, - 0x44, 0x46, 0x48, 0x50, 0x46, 0x4e, 0x4a, 0x37, 0x4b, 0x4c, 0x4a, 0x50, - 0x45, 0x4a, 0x48, 0x3b, 0x48, 0x44, 0x48, 0x4a, 0x41, 0x44, 0x52, 0x3f, - 0x4c, 0x46, 0x4a, 0x45, 0x46, 0x49, 0x49, 0x36, 0x53, 0x3e, 0x48, 0x47, - 0x3f, 0x42, 0x41, 0x4c, 0x42, 0x4a, 0x52, 0x46, 0x49, 0x3f, 0x48, 0x5a, - 0x43, 0x42, 0x3d, 0x43, 0x4f, 0x44, 0x43, 0x65, 0x41, 0x41, 0x44, 0x4b, - 0x50, 0x44, 0x53, 0x49, 0x41, 0x45, 0x4a, 0x4d, 0x40, 0x45, 0x4a, 0x4e, - 0x50, 0x40, 0x51, 0x40, 0x5e, 0x50, 0x43, 0x5c, 0x47, 0x5a, 0x44, 0x4c, - 0x54, 0x64, 0x4f, 0x63, 0x39, 0x58, 0x3c, 0x4a, 0x42, 0x5e, 0x3c, 0x4a, - 0x48, 0x7b, 0x34, 0x4c, 0x4f, 0x44, 0x30, 0x24, 0x50, 0x65, 0x47, 0x39, - 0x46, 0x3e, 0x3f, 0x33, 0x65, 0x5a, 0x44, 0x38, 0x50, 0x47, 0x4b, 0x3e, - 0x5b, 0x53, 0x4a, 0x4d, 0x51, 0x40, 0x47, 0x59, 0x51, 0x42, 0x4f, 0x50, - 0x45, 0x57, 0x46, 0x50, 0x3f, 0x3c, 0x4c, 0x4f, 0x46, 0x41, 0x4a, 0x3e, - 0x4d, 0x45, 0x51, 0x48, 0x4e, 0x44, 0x4e, 0x35, 0x44, 0x3f, 0x44, 0x48, - 0x3c, 0x4c, 0x49, 0x2c, 0x4a, 0x46, 0x48, 0x44, 0x4b, 0x42, 0x4b, 0x2f, - 0x4e, 0x50, 0x4c, 0x4d, 0x44, 0x46, 0x3f, 0x39, 0x4d, 0x47, 0x45, 0x41, - 0x42, 0x47, 0x4a, 0x3a, 0x40, 0x3e, 0x4a, 0x51, 0x3f, 0x47, 0x44, 0x37, - 0x47, 0x4e, 0x47, 0x52, 0x45, 0x42, 0x4a, 0x3d, 0x43, 0x4d, 0x4d, 0x47, - 0x48, 0x43, 0x44, 0x44, 0x47, 0x4e, 0x52, 0x4b, 0x4e, 0x50, 0x42, 0x47, - 0x4b, 0x4b, 0x4e, 0x4c, 0x4e, 0x47, 0x50, 0x56, 0x46, 0x47, 0x4d, 0x49, - 0x4d, 0x46, 0x49, 0x5f, 0x49, 0x42, 0x4d, 0x44, 0x40, 0x4b, 0x52, 0x45, - 0x46, 0x4a, 0x4b, 0x49, 0x47, 0x4b, 0x42, 0x45, 0x42, 0x44, 0x46, 0x4c, - 0x62, 0x4a, 0x44, 0x53, 0x43, 0x5a, 0x48, 0x49, 0x59, 0x68, 0x46, 0x61, - 0x40, 0x5a, 0x3a, 0x4d, 0x45, 0x5e, 0x33, 0x4f, 0x4e, 0x74, 0x3e, 0x3e, - 0x5a, 0x4b, 0x34, 0x31, 0x52, 0x6c, 0x44, 0x39, 0x4c, 0x3b, 0x39, 0x3a, - 0x63, 0x65, 0x4b, 0x40, 0x50, 0x4d, 0x53, 0x4a, 0x69, 0x56, 0x54, 0x45, - 0x4c, 0x4c, 0x50, 0x5b, 0x4d, 0x4f, 0x3d, 0x4b, 0x44, 0x47, 0x43, 0x47, - 0x49, 0x3c, 0x49, 0x41, 0x41, 0x3f, 0x47, 0x43, 0x48, 0x47, 0x4c, 0x43, - 0x4a, 0x40, 0x4d, 0x32, 0x4b, 0x4d, 0x44, 0x48, 0x46, 0x44, 0x50, 0x2f, - 0x4e, 0x49, 0x53, 0x4b, 0x52, 0x47, 0x4b, 0x2b, 0x48, 0x4b, 0x4a, 0x4c, - 0x4d, 0x4c, 0x43, 0x37, 0x48, 0x3c, 0x4b, 0x42, 0x51, 0x3f, 0x45, 0x3c, - 0x49, 0x40, 0x42, 0x43, 0x4d, 0x4c, 0x3f, 0x3f, 0x4d, 0x43, 0x45, 0x42, - 0x48, 0x42, 0x48, 0x39, 0x51, 0x4e, 0x46, 0x4f, 0x3e, 0x4c, 0x45, 0x3e, - 0x3f, 0x3f, 0x43, 0x41, 0x4b, 0x4b, 0x43, 0x4d, 0x44, 0x3b, 0x48, 0x45, - 0x3c, 0x4a, 0x48, 0x5b, 0x3c, 0x4b, 0x4c, 0x44, 0x46, 0x3e, 0x45, 0x57, - 0x43, 0x42, 0x51, 0x4a, 0x46, 0x47, 0x43, 0x49, 0x42, 0x43, 0x50, 0x4e, - 0x4e, 0x44, 0x41, 0x4e, 0x4e, 0x41, 0x48, 0x47, 0x5c, 0x53, 0x44, 0x54, - 0x44, 0x5b, 0x45, 0x46, 0x55, 0x67, 0x4d, 0x5d, 0x40, 0x5a, 0x43, 0x4b, - 0x43, 0x60, 0x3c, 0x4b, 0x41, 0x79, 0x41, 0x41, 0x58, 0x48, 0x40, 0x3b, - 0x4f, 0x6c, 0x46, 0x3f, 0x53, 0x3a, 0x3d, 0x36, 0x5a, 0x57, 0x44, 0x41, - 0x4c, 0x47, 0x4e, 0x48, 0x62, 0x60, 0x4a, 0x46, 0x51, 0x3e, 0x52, 0x5f, - 0x4b, 0x46, 0x48, 0x4c, 0x4c, 0x55, 0x43, 0x46, 0x49, 0x3e, 0x41, 0x40, - 0x4d, 0x47, 0x46, 0x3b, 0x51, 0x3a, 0x4a, 0x45, 0x50, 0x47, 0x51, 0x38, - 0x44, 0x41, 0x40, 0x4b, 0x4d, 0x44, 0x4d, 0x28, 0x47, 0x3e, 0x44, 0x40, - 0x49, 0x49, 0x40, 0x3c, 0x44, 0x4c, 0x48, 0x51, 0x46, 0x3e, 0x47, 0x2a, - 0x41, 0x44, 0x49, 0x4c, 0x4e, 0x4e, 0x42, 0x3c, 0x49, 0x42, 0x43, 0x45, - 0x4e, 0x4d, 0x50, 0x39, 0x42, 0x43, 0x48, 0x41, 0x3f, 0x40, 0x4e, 0x3a, - 0x44, 0x3d, 0x49, 0x4d, 0x47, 0x45, 0x4b, 0x42, 0x4c, 0x4d, 0x3f, 0x3f, - 0x4e, 0x4d, 0x4d, 0x4d, 0x4d, 0x45, 0x47, 0x43, 0x4c, 0x46, 0x47, 0x57, - 0x4b, 0x42, 0x4d, 0x46, 0x4b, 0x4b, 0x43, 0x58, 0x48, 0x49, 0x4d, 0x47, - 0x43, 0x49, 0x4b, 0x48, 0x46, 0x4f, 0x4f, 0x42, 0x4a, 0x43, 0x49, 0x4e, - 0x4a, 0x47, 0x4c, 0x48, 0x5a, 0x57, 0x4a, 0x58, 0x49, 0x4f, 0x45, 0x47, - 0x63, 0x66, 0x4d, 0x5e, 0x4b, 0x51, 0x45, 0x4a, 0x43, 0x5d, 0x33, 0x4b, - 0x4e, 0x70, 0x42, 0x39, 0x57, 0x4a, 0x40, 0x3a, 0x51, 0x68, 0x45, 0x45, - 0x4c, 0x44, 0x3a, 0x3a, 0x4f, 0x62, 0x49, 0x45, 0x53, 0x4c, 0x4e, 0x41, - 0x63, 0x5e, 0x44, 0x44, 0x47, 0x43, 0x47, 0x59, 0x4c, 0x4b, 0x4c, 0x49, - 0x3e, 0x43, 0x4c, 0x46, 0x4c, 0x38, 0x47, 0x46, 0x46, 0x47, 0x40, 0x44, - 0x51, 0x3e, 0x40, 0x47, 0x3f, 0x45, 0x48, 0x2a, 0x42, 0x3e, 0x43, 0x46, - 0x50, 0x4c, 0x4a, 0x2c, 0x49, 0x4b, 0x48, 0x48, 0x40, 0x4a, 0x4a, 0x37, - 0x4e, 0x42, 0x4f, 0x4c, 0x41, 0x43, 0x45, 0x38, 0x4e, 0x3d, 0x41, 0x47, - 0x42, 0x42, 0x43, 0x3b, 0x4a, 0x40, 0x48, 0x4a, 0x53, 0x44, 0x4d, 0x35, - 0x51, 0x3c, 0x4e, 0x4e, 0x3e, 0x3f, 0x4b, 0x3c, 0x3e, 0x47, 0x41, 0x48, - 0x40, 0x46, 0x4e, 0x44, 0x49, 0x42, 0x49, 0x44, 0x4b, 0x46, 0x46, 0x43, - 0x4c, 0x4b, 0x49, 0x4d, 0x3d, 0x47, 0x43, 0x5c, 0x4a, 0x42, 0x47, 0x4e, - 0x47, 0x40, 0x4c, 0x55, 0x3f, 0x45, 0x46, 0x49, 0x46, 0x48, 0x49, 0x4d, - 0x4c, 0x41, 0x49, 0x40, 0x4a, 0x44, 0x42, 0x49, 0x52, 0x41, 0x49, 0x4a, - 0x5c, 0x53, 0x47, 0x58, 0x49, 0x55, 0x4a, 0x4a, 0x62, 0x61, 0x4b, 0x57, - 0x3c, 0x50, 0x42, 0x4c, 0x49, 0x5f, 0x3f, 0x4a, 0x42, 0x70, 0x40, 0x40, - 0x4f, 0x46, 0x43, 0x43, 0x4d, 0x6c, 0x41, 0x3e, 0x4e, 0x49, 0x43, 0x38, - 0x50, 0x57, 0x43, 0x39, 0x4a, 0x4f, 0x51, 0x3e, 0x5c, 0x57, 0x46, 0x49, - 0x41, 0x40, 0x42, 0x4f, 0x4c, 0x45, 0x46, 0x4a, 0x4c, 0x4b, 0x43, 0x42, - 0x4c, 0x3c, 0x47, 0x47, 0x4f, 0x44, 0x45, 0x3a, 0x4d, 0x3d, 0x4d, 0x3f, - 0x46, 0x4f, 0x41, 0x37, 0x46, 0x45, 0x54, 0x47, 0x4e, 0x46, 0x47, 0x23, - 0x48, 0x4e, 0x4a, 0x47, 0x45, 0x45, 0x4e, 0x33, 0x49, 0x4a, 0x4d, 0x4e, - 0x49, 0x46, 0x49, 0x36, 0x48, 0x44, 0x53, 0x44, 0x4a, 0x45, 0x4a, 0x37, - 0x45, 0x36, 0x4b, 0x4e, 0x50, 0x3f, 0x49, 0x38, 0x40, 0x43, 0x46, 0x4c, - 0x43, 0x46, 0x4a, 0x3f, 0x45, 0x3d, 0x44, 0x47, 0x44, 0x42, 0x4a, 0x45, - 0x47, 0x43, 0x4d, 0x4d, 0x44, 0x44, 0x4f, 0x4a, 0x4a, 0x41, 0x50, 0x50, - 0x4b, 0x44, 0x54, 0x5c, 0x4b, 0x3a, 0x46, 0x4a, 0x4a, 0x43, 0x48, 0x5c, - 0x4b, 0x43, 0x47, 0x3d, 0x3e, 0x54, 0x42, 0x47, 0x42, 0x4f, 0x4b, 0x4b, - 0x46, 0x46, 0x46, 0x42, 0x42, 0x4b, 0x48, 0x45, 0x51, 0x4e, 0x49, 0x4d, - 0x43, 0x56, 0x45, 0x40, 0x5a, 0x58, 0x4c, 0x55, 0x40, 0x4b, 0x4c, 0x51, - 0x42, 0x59, 0x43, 0x46, 0x46, 0x69, 0x43, 0x3c, 0x54, 0x47, 0x3d, 0x41, - 0x52, 0x64, 0x44, 0x38, 0x4f, 0x49, 0x3a, 0x3a, 0x55, 0x54, 0x45, 0x3e, - 0x49, 0x44, 0x4e, 0x3f, 0x57, 0x50, 0x47, 0x43, 0x45, 0x48, 0x53, 0x5b, - 0x53, 0x4d, 0x48, 0x4e, 0x48, 0x3a, 0x3e, 0x46, 0x42, 0x36, 0x50, 0x4d, - 0x49, 0x4b, 0x4b, 0x45, 0x4c, 0x44, 0x50, 0x47, 0x3e, 0x49, 0x50, 0x37, - 0x4c, 0x4b, 0x4a, 0x54, 0x4e, 0x43, 0x40, 0x25, 0x46, 0x42, 0x52, 0x3d, - 0x44, 0x45, 0x51, 0x2e, 0x4a, 0x3d, 0x46, 0x46, 0x4c, 0x42, 0x48, 0x34, - 0x44, 0x44, 0x44, 0x4c, 0x4f, 0x4b, 0x42, 0x3d, 0x45, 0x40, 0x47, 0x49, - 0x43, 0x41, 0x3e, 0x39, 0x47, 0x4b, 0x50, 0x4a, 0x46, 0x47, 0x4e, 0x3b, - 0x4e, 0x3e, 0x49, 0x4a, 0x50, 0x40, 0x43, 0x49, 0x48, 0x3c, 0x4f, 0x45, - 0x4a, 0x41, 0x42, 0x48, 0x4b, 0x46, 0x4a, 0x50, 0x40, 0x49, 0x44, 0x54, - 0x45, 0x45, 0x4a, 0x4b, 0x51, 0x51, 0x48, 0x53, 0x50, 0x3f, 0x50, 0x46, - 0x44, 0x45, 0x51, 0x43, 0x4f, 0x3e, 0x41, 0x41, 0x46, 0x45, 0x45, 0x4c, - 0x54, 0x3c, 0x4a, 0x4c, 0x5a, 0x4f, 0x46, 0x4b, 0x47, 0x4a, 0x43, 0x4c, - 0x56, 0x5a, 0x4a, 0x53, 0x4c, 0x49, 0x46, 0x4c, 0x45, 0x59, 0x40, 0x4b, - 0x48, 0x60, 0x3d, 0x42, 0x52, 0x3f, 0x42, 0x3d, 0x52, 0x5f, 0x46, 0x42, - 0x4b, 0x4e, 0x4a, 0x3d, 0x52, 0x55, 0x53, 0x37, 0x47, 0x3e, 0x4a, 0x42, - 0x51, 0x54, 0x48, 0x48, 0x4b, 0x48, 0x3e, 0x52, 0x41, 0x4e, 0x4c, 0x4f, - 0x43, 0x3b, 0x4b, 0x4b, 0x4c, 0x40, 0x48, 0x49, 0x4d, 0x3a, 0x45, 0x3c, - 0x53, 0x44, 0x48, 0x4d, 0x4b, 0x49, 0x46, 0x3c, 0x4d, 0x40, 0x51, 0x3f, - 0x4c, 0x45, 0x44, 0x2f, 0x49, 0x51, 0x3f, 0x4d, 0x3e, 0x4e, 0x3c, 0x30, - 0x3d, 0x48, 0x4f, 0x3f, 0x45, 0x45, 0x46, 0x3b, 0x4c, 0x46, 0x4d, 0x50, - 0x4c, 0x3d, 0x41, 0x37, 0x3e, 0x3e, 0x4f, 0x4b, 0x4d, 0x4f, 0x45, 0x45, - 0x4a, 0x47, 0x4a, 0x44, 0x43, 0x46, 0x51, 0x41, 0x4e, 0x39, 0x44, 0x4a, - 0x4e, 0x49, 0x4a, 0x42, 0x49, 0x4b, 0x4e, 0x48, 0x49, 0x4a, 0x45, 0x4a, - 0x45, 0x41, 0x4a, 0x4b, 0x42, 0x41, 0x48, 0x4a, 0x44, 0x3a, 0x46, 0x49, - 0x54, 0x45, 0x44, 0x60, 0x4a, 0x4e, 0x45, 0x4a, 0x4a, 0x45, 0x4b, 0x49, - 0x42, 0x44, 0x46, 0x50, 0x4b, 0x4b, 0x4e, 0x45, 0x48, 0x3e, 0x55, 0x42, - 0x51, 0x49, 0x49, 0x44, 0x4e, 0x54, 0x53, 0x49, 0x4c, 0x63, 0x48, 0x5a, - 0x50, 0x4b, 0x45, 0x49, 0x43, 0x57, 0x4c, 0x3f, 0x4d, 0x67, 0x3f, 0x47, - 0x53, 0x49, 0x43, 0x44, 0x49, 0x61, 0x50, 0x47, 0x49, 0x49, 0x4a, 0x42, - 0x4a, 0x51, 0x46, 0x43, 0x3f, 0x34, 0x40, 0x3a, 0x45, 0x54, 0x4c, 0x55, - 0x40, 0x3c, 0x4a, 0x4d, 0x3e, 0x4d, 0x48, 0x51, 0x4c, 0x3e, 0x4c, 0x4f, - 0x50, 0x47, 0x4d, 0x49, 0x4d, 0x4e, 0x45, 0x43, 0x41, 0x41, 0x40, 0x47, - 0x43, 0x4a, 0x4a, 0x3c, 0x4c, 0x3d, 0x4e, 0x43, 0x41, 0x42, 0x4a, 0x30, - 0x45, 0x4c, 0x45, 0x55, 0x46, 0x39, 0x43, 0x39, 0x45, 0x47, 0x48, 0x53, - 0x4a, 0x48, 0x43, 0x38, 0x4f, 0x51, 0x4d, 0x4c, 0x41, 0x46, 0x40, 0x3d, - 0x43, 0x4b, 0x40, 0x46, 0x47, 0x50, 0x4a, 0x43, 0x50, 0x4e, 0x45, 0x4f, - 0x4d, 0x44, 0x4d, 0x3f, 0x4e, 0x48, 0x4a, 0x49, 0x44, 0x3d, 0x4a, 0x44, - 0x40, 0x45, 0x49, 0x40, 0x4a, 0x44, 0x4f, 0x4a, 0x43, 0x4a, 0x4e, 0x52, - 0x4d, 0x50, 0x48, 0x4c, 0x43, 0x45, 0x4d, 0x54, 0x4a, 0x49, 0x4c, 0x58, - 0x4c, 0x48, 0x4c, 0x44, 0x4b, 0x4e, 0x52, 0x44, 0x49, 0x44, 0x47, 0x4e, - 0x4b, 0x45, 0x49, 0x3e, 0x4c, 0x3b, 0x53, 0x3f, 0x51, 0x41, 0x3f, 0x44, - 0x43, 0x4a, 0x4b, 0x43, 0x53, 0x57, 0x50, 0x53, 0x4f, 0x4b, 0x48, 0x51, - 0x47, 0x49, 0x46, 0x4d, 0x4d, 0x5e, 0x44, 0x46, 0x56, 0x3d, 0x3c, 0x3e, - 0x47, 0x55, 0x54, 0x46, 0x42, 0x49, 0x4f, 0x43, 0x48, 0x54, 0x51, 0x40, - 0x44, 0x44, 0x47, 0x45, 0x4b, 0x59, 0x4d, 0x47, 0x40, 0x39, 0x48, 0x54, - 0x43, 0x45, 0x44, 0x42, 0x4c, 0x3c, 0x4d, 0x42, 0x4b, 0x45, 0x42, 0x48, - 0x51, 0x44, 0x45, 0x3f, 0x3d, 0x49, 0x4b, 0x4a, 0x41, 0x43, 0x4f, 0x3f, - 0x51, 0x4b, 0x44, 0x46, 0x46, 0x44, 0x53, 0x3d, 0x47, 0x47, 0x43, 0x4b, - 0x41, 0x43, 0x3c, 0x3b, 0x49, 0x47, 0x47, 0x49, 0x4b, 0x3d, 0x43, 0x43, - 0x4b, 0x47, 0x45, 0x4e, 0x42, 0x4a, 0x4c, 0x3e, 0x51, 0x3e, 0x46, 0x44, - 0x46, 0x43, 0x42, 0x42, 0x47, 0x4d, 0x51, 0x4b, 0x49, 0x44, 0x4d, 0x40, - 0x50, 0x43, 0x41, 0x4c, 0x42, 0x49, 0x49, 0x4c, 0x42, 0x50, 0x48, 0x3f, - 0x46, 0x42, 0x48, 0x57, 0x49, 0x4d, 0x47, 0x4e, 0x48, 0x4b, 0x46, 0x50, - 0x47, 0x45, 0x52, 0x45, 0x4b, 0x48, 0x40, 0x5b, 0x4e, 0x43, 0x51, 0x48, - 0x48, 0x4a, 0x4a, 0x4a, 0x52, 0x51, 0x4c, 0x4b, 0x42, 0x55, 0x4d, 0x46, - 0x50, 0x40, 0x4a, 0x50, 0x51, 0x3e, 0x42, 0x4c, 0x43, 0x46, 0x4d, 0x46, - 0x46, 0x4d, 0x4d, 0x52, 0x4e, 0x44, 0x45, 0x47, 0x49, 0x4c, 0x41, 0x44, - 0x4d, 0x54, 0x4c, 0x4a, 0x54, 0x3e, 0x44, 0x43, 0x53, 0x55, 0x4b, 0x4a, - 0x47, 0x47, 0x4f, 0x46, 0x4f, 0x4b, 0x51, 0x3f, 0x41, 0x4c, 0x43, 0x46, - 0x55, 0x51, 0x40, 0x4b, 0x4f, 0x40, 0x47, 0x50, 0x4e, 0x4a, 0x46, 0x4e, - 0x42, 0x4d, 0x48, 0x49, 0x48, 0x4a, 0x4a, 0x43, 0x49, 0x48, 0x44, 0x3b, - 0x51, 0x46, 0x3d, 0x43, 0x47, 0x4a, 0x4f, 0x42, 0x4a, 0x50, 0x4f, 0x41, - 0x45, 0x45, 0x43, 0x3c, 0x4c, 0x4c, 0x46, 0x4b, 0x3e, 0x44, 0x4b, 0x3a, - 0x45, 0x50, 0x42, 0x48, 0x46, 0x47, 0x44, 0x3a, 0x53, 0x46, 0x4e, 0x4f, - 0x43, 0x40, 0x46, 0x48, 0x4e, 0x45, 0x3f, 0x47, 0x48, 0x3f, 0x44, 0x4f, - 0x44, 0x47, 0x4e, 0x47, 0x47, 0x49, 0x42, 0x43, 0x3f, 0x49, 0x4a, 0x53, - 0x53, 0x4a, 0x4e, 0x4a, 0x49, 0x4d, 0x49, 0x41, 0x48, 0x4d, 0x4d, 0x4e, - 0x4b, 0x45, 0x4d, 0x4a, 0x46, 0x4a, 0x46, 0x51, 0x4b, 0x47, 0x49, 0x45, - 0x49, 0x49, 0x4b, 0x5c, 0x48, 0x42, 0x51, 0x4c, 0x41, 0x3f, 0x4c, 0x42, - 0x4f, 0x45, 0x4b, 0x4a, 0x52, 0x48, 0x53, 0x4f, 0x40, 0x47, 0x41, 0x47, - 0x68, 0xfb, 0xff, 0xff, 0x4c, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, + 0xbc, 0xb3, 0xff, 0xff, 0xc0, 0xb3, 0xff, 0xff, 0x1e, 0xb4, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x80, 0x02, 0x00, 0x00, 0x89, 0xa5, 0xe8, 0xc1, + 0xb1, 0x89, 0x5b, 0xc6, 0x4f, 0x9b, 0xd3, 0x74, 0x93, 0x88, 0xff, 0xaf, + 0x89, 0xff, 0xf4, 0x70, 0xcc, 0x75, 0x78, 0xbf, 0x92, 0xcd, 0xa9, 0xa8, + 0xd6, 0x6a, 0x6f, 0x7b, 0x7f, 0xd8, 0xa8, 0xb1, 0xe6, 0x32, 0x21, 0x70, + 0xa0, 0x9c, 0x6f, 0xc8, 0xc6, 0x59, 0x67, 0x93, 0x97, 0xca, 0x3f, 0xde, + 0xcb, 0x74, 0x7c, 0xb5, 0xa4, 0xd9, 0x66, 0xc6, 0x87, 0x98, 0xa5, 0xd0, + 0xbb, 0xb9, 0xc2, 0xb2, 0xaa, 0x79, 0x25, 0xb9, 0x6d, 0x5a, 0xc8, 0x7f, + 0x70, 0x85, 0x79, 0xbc, 0x6a, 0x9b, 0xd1, 0x9a, 0x9c, 0x51, 0x53, 0x71, + 0x89, 0xc0, 0xb4, 0xac, 0xae, 0x47, 0x67, 0x70, 0x79, 0xd2, 0x81, 0xa5, + 0xd2, 0x09, 0x38, 0x82, 0x74, 0xc9, 0x5d, 0xaf, 0xc1, 0x4f, 0x53, 0x99, + 0xcb, 0xb7, 0x3a, 0xba, 0xe8, 0x7f, 0x76, 0xb9, 0xb3, 0xd3, 0x60, 0xc0, + 0x93, 0x9f, 0x87, 0xbd, 0xd0, 0xb8, 0xca, 0xc1, 0xb6, 0x6c, 0x01, 0xc1, + 0x5c, 0x5d, 0xb2, 0x82, 0x76, 0x77, 0x39, 0xbc, 0x72, 0x6a, 0xc3, 0xb4, + 0x79, 0x21, 0x48, 0x42, 0x86, 0xa6, 0xbd, 0xaf, 0xae, 0x23, 0x9c, 0x69, + 0x78, 0xc3, 0x6b, 0xb3, 0xab, 0x43, 0xb2, 0x88, 0x71, 0xc6, 0x6b, 0xbe, + 0xc3, 0x75, 0xc2, 0xc3, 0xa5, 0xcf, 0x32, 0xbe, 0xcb, 0xb0, 0xb8, 0xc1, + 0x9c, 0xcf, 0x64, 0xc4, 0xb4, 0x96, 0xa8, 0xb9, 0xcb, 0xc0, 0xc0, 0xb8, + 0xb8, 0x77, 0x65, 0xc0, 0xc4, 0xb3, 0xc5, 0x77, 0x9b, 0x61, 0xd4, 0xac, + 0x7e, 0x36, 0xb1, 0xae, 0x36, 0x36, 0xb8, 0x39, 0x6b, 0x70, 0x9c, 0xb5, + 0x88, 0x5c, 0xb3, 0x6a, 0xad, 0xc5, 0x7b, 0xb4, 0xad, 0xaa, 0xc4, 0x84, + 0x5e, 0xc4, 0x67, 0xc1, 0xde, 0xba, 0xcf, 0xbd, 0xa0, 0xd3, 0x35, 0xb3, + 0xe7, 0xc8, 0xb8, 0xb8, 0xaf, 0xb4, 0x59, 0xb8, 0xb4, 0xac, 0xac, 0xaa, + 0xc7, 0xad, 0xc8, 0xb6, 0xac, 0x99, 0xa0, 0xcb, 0xc1, 0xc8, 0xcb, 0x89, + 0xc3, 0xac, 0xca, 0x8b, 0x97, 0x1f, 0xbd, 0xbf, 0x13, 0xad, 0xc8, 0x41, + 0x56, 0x3c, 0x86, 0xb2, 0x61, 0xc4, 0xbb, 0x71, 0xba, 0x92, 0x8d, 0xc3, + 0x86, 0xcb, 0xc5, 0x8d, 0x88, 0xc8, 0x6a, 0xbf, 0x9c, 0xcd, 0xcd, 0xc0, + 0x81, 0xb1, 0x47, 0xb5, 0xf0, 0xce, 0xb1, 0xc1, 0xaa, 0xa8, 0x54, 0xcb, + 0xbc, 0xc7, 0xc5, 0x8e, 0xc3, 0xce, 0xc7, 0xb9, 0xb9, 0xa1, 0xc5, 0xbd, + 0xb8, 0xb8, 0xb7, 0x81, 0xb6, 0xba, 0xd2, 0x90, 0xbc, 0x96, 0xbe, 0xba, + 0x53, 0xb5, 0xc7, 0x3c, 0x3c, 0x1f, 0x90, 0xaa, 0x5a, 0xb8, 0xba, 0x7e, + 0xbc, 0x9e, 0xc2, 0xb1, 0x6e, 0xc0, 0xc4, 0x91, 0xf0, 0xb5, 0x60, 0xad, + 0x73, 0xba, 0xcd, 0xba, 0x6e, 0x94, 0x39, 0xb5, 0xe4, 0xbe, 0xb4, 0xb5, + 0xa0, 0xa9, 0x51, 0xac, 0xbc, 0xc2, 0xb3, 0x8a, 0xbd, 0x9a, 0xca, 0xb3, + 0xbf, 0xaf, 0xb5, 0x9a, 0xb9, 0xc3, 0xb6, 0x92, 0xb5, 0xc1, 0xb0, 0x95, + 0xd6, 0xcc, 0xbb, 0xbb, 0xa9, 0xb9, 0xac, 0x4a, 0x62, 0x27, 0xa7, 0xa7, + 0x30, 0xbd, 0xb1, 0x73, 0xa1, 0x74, 0xc2, 0xb7, 0x58, 0xc0, 0xae, 0x8f, + 0xe1, 0xac, 0x4e, 0xb0, 0x55, 0xc9, 0xc8, 0x9f, 0x83, 0x8e, 0x3e, 0xd5, + 0xb5, 0xbe, 0xcd, 0xb2, 0xa6, 0xc8, 0x64, 0xac, 0xc0, 0xc8, 0xaf, 0x99, + 0xc5, 0x9e, 0xb8, 0xbd, 0xa9, 0xc2, 0xb3, 0x81, 0xb4, 0xc2, 0xb4, 0x8f, + 0xbc, 0xb8, 0x9c, 0x88, 0xbe, 0xc6, 0xbf, 0xba, 0xc8, 0xb4, 0xab, 0x5b, + 0x92, 0x51, 0xb1, 0x9a, 0x44, 0xb9, 0xab, 0x80, 0xa5, 0x3e, 0xc0, 0xa5, + 0x5c, 0xb6, 0xa8, 0xa2, 0xb3, 0x9a, 0x6b, 0xb3, 0x34, 0xc6, 0x7e, 0x96, + 0xcb, 0x88, 0x48, 0xc6, 0xa3, 0xbb, 0xd2, 0xa2, 0xaf, 0xd0, 0x6e, 0xae, + 0xb4, 0xce, 0xc8, 0x8f, 0xd7, 0xad, 0xc8, 0xb0, 0xae, 0xb7, 0xb2, 0x70, + 0xb9, 0xad, 0xc1, 0xa0, 0xcb, 0xa2, 0xb0, 0x9b, 0xbe, 0xd3, 0xca, 0xb6, + 0xbd, 0xaf, 0xa9, 0x82, 0xa1, 0xd7, 0xbc, 0x9b, 0x8b, 0xac, 0xaa, 0xac, + 0xad, 0x37, 0xb7, 0xb6, 0x46, 0xae, 0xa9, 0xbd, 0x6b, 0x90, 0x5e, 0xcd, + 0x23, 0xa4, 0x76, 0xa1, 0xc4, 0x96, 0x50, 0xcc, 0x95, 0x99, 0x93, 0xa7, + 0xb2, 0xe1, 0x7c, 0xbd, 0xbd, 0xb5, 0xbf, 0x9a, 0xca, 0x80, 0xd7, 0xae, + 0x79, 0xa8, 0xaa, 0xb2, 0xbc, 0x51, 0xda, 0xa3, 0x80, 0x8b, 0xa2, 0xc8, + 0xd1, 0x94, 0xe1, 0xc4, 0xbd, 0xae, 0xae, 0xcc, 0xb3, 0xca, 0xd5, 0xa1, + 0xd5, 0xa7, 0xaf, 0xd2, 0xb4, 0x8d, 0xcc, 0xc8, 0x63, 0xa3, 0xa4, 0xdf, + 0x6f, 0x7e, 0x98, 0xdf, 0x1b, 0x7b, 0x43, 0x99, 0xb0, 0x99, 0x71, 0xdb, + 0x63, 0x7b, 0x69, 0x9c, 0xba, 0xcd, 0x90, 0xd0, 0xb6, 0xa6, 0x9e, 0x95, + 0x50, 0xb6, 0xff, 0xff, 0xae, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc7, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, + 0xda, 0xb6, 0xff, 0xff, 0x04, 0x00, 0x00, 0x00, 0xc0, 0x44, 0x00, 0x00, + 0x2c, 0x30, 0x38, 0x5a, 0x3d, 0x4c, 0x44, 0x3b, 0x48, 0x48, 0x44, 0x57, + 0x3f, 0x43, 0x45, 0x3a, 0x24, 0x32, 0x21, 0x5c, 0x3f, 0x3a, 0x38, 0x3a, + 0x35, 0x35, 0x2f, 0x51, 0x3c, 0x3a, 0x45, 0x3a, 0x3b, 0x41, 0x39, 0x55, + 0x3c, 0x41, 0x39, 0x44, 0x3a, 0x40, 0x37, 0x48, 0x33, 0x47, 0x36, 0x3e, + 0x3c, 0x41, 0x3f, 0x3e, 0x3e, 0x47, 0x36, 0x3e, 0x41, 0x33, 0x3e, 0x3b, + 0x3a, 0x46, 0x45, 0x40, 0x48, 0x3a, 0x35, 0x4b, 0x45, 0x4d, 0x3c, 0x49, + 0x42, 0x44, 0x3c, 0x4c, 0x3e, 0x3c, 0x44, 0x32, 0x33, 0x41, 0x36, 0x4b, + 0x38, 0x3b, 0x3c, 0x38, 0x3b, 0x45, 0x34, 0x46, 0x40, 0x4e, 0x44, 0x35, + 0x43, 0x36, 0x3d, 0x40, 0x3e, 0x48, 0x40, 0x34, 0x3a, 0x46, 0x45, 0x43, + 0x45, 0x3f, 0x47, 0x37, 0x36, 0x35, 0x44, 0x3a, 0x3e, 0x37, 0x39, 0x40, + 0x3a, 0x3f, 0x3f, 0x4c, 0x3e, 0x41, 0x43, 0x35, 0x3f, 0x3d, 0x3d, 0x4c, + 0x3c, 0x4a, 0x46, 0x3c, 0x3a, 0x41, 0x40, 0x4e, 0x36, 0x47, 0x40, 0x3b, + 0x47, 0x42, 0x38, 0x4d, 0x48, 0x47, 0x3c, 0x3c, 0x33, 0x3b, 0x3e, 0x42, + 0x3f, 0x3e, 0x3a, 0x3d, 0x32, 0x39, 0x41, 0x46, 0x3a, 0x3a, 0x3e, 0x3e, + 0x47, 0x48, 0x4e, 0x36, 0x44, 0x40, 0x41, 0x45, 0x3a, 0x3c, 0x38, 0x55, + 0x2e, 0x26, 0x2f, 0x32, 0x3f, 0x41, 0x3e, 0x4c, 0x45, 0x36, 0x40, 0x31, + 0x17, 0x2e, 0x14, 0x53, 0x34, 0x30, 0x34, 0x3f, 0x2e, 0x44, 0x2b, 0x4e, + 0x34, 0x3e, 0x34, 0x43, 0x3d, 0x35, 0x3f, 0x46, 0x39, 0x40, 0x38, 0x3e, + 0x35, 0x3b, 0x35, 0x45, 0x3d, 0x40, 0x38, 0x37, 0x40, 0x3e, 0x32, 0x3e, + 0x41, 0x39, 0x30, 0x41, 0x3a, 0x32, 0x3e, 0x3d, 0x39, 0x31, 0x33, 0x3e, + 0x41, 0x47, 0x40, 0x47, 0x35, 0x33, 0x3c, 0x32, 0x40, 0x3c, 0x42, 0x49, + 0x34, 0x38, 0x39, 0x37, 0x39, 0x35, 0x40, 0x4d, 0x37, 0x43, 0x42, 0x3e, + 0x3f, 0x3c, 0x3e, 0x51, 0x36, 0x37, 0x42, 0x41, 0x36, 0x31, 0x43, 0x3d, + 0x46, 0x43, 0x37, 0x46, 0x32, 0x45, 0x42, 0x36, 0x3f, 0x42, 0x42, 0x41, + 0x3d, 0x46, 0x39, 0x41, 0x3c, 0x3f, 0x38, 0x3c, 0x43, 0x43, 0x3d, 0x3c, + 0x3d, 0x41, 0x38, 0x42, 0x3a, 0x3d, 0x43, 0x42, 0x41, 0x40, 0x39, 0x36, + 0x3a, 0x3c, 0x3c, 0x4f, 0x44, 0x36, 0x39, 0x35, 0x46, 0x46, 0x36, 0x4a, + 0x3a, 0x42, 0x43, 0x39, 0x3f, 0x3d, 0x3c, 0x47, 0x38, 0x3f, 0x43, 0x40, + 0x36, 0x3c, 0x45, 0x3b, 0x33, 0x36, 0x3b, 0x39, 0x3c, 0x35, 0x40, 0x38, + 0x40, 0x3e, 0x3f, 0x48, 0x3f, 0x34, 0x40, 0x53, 0x26, 0x2c, 0x29, 0x39, + 0x2a, 0x38, 0x3f, 0x45, 0x32, 0x31, 0x4a, 0x37, 0x1c, 0x28, 0x09, 0x43, + 0x35, 0x3b, 0x33, 0x3c, 0x32, 0x3f, 0x28, 0x41, 0x36, 0x35, 0x3a, 0x37, + 0x41, 0x39, 0x32, 0x3c, 0x40, 0x3c, 0x3c, 0x32, 0x38, 0x39, 0x37, 0x44, + 0x3a, 0x33, 0x41, 0x36, 0x37, 0x3c, 0x35, 0x3a, 0x3d, 0x30, 0x3d, 0x41, + 0x37, 0x3c, 0x45, 0x3a, 0x37, 0x2f, 0x36, 0x3c, 0x3a, 0x3d, 0x39, 0x48, + 0x46, 0x33, 0x3a, 0x3e, 0x40, 0x3d, 0x3b, 0x52, 0x38, 0x45, 0x34, 0x47, + 0x39, 0x36, 0x37, 0x56, 0x42, 0x3f, 0x33, 0x36, 0x38, 0x3f, 0x40, 0x53, + 0x3e, 0x37, 0x3d, 0x3c, 0x48, 0x3a, 0x3d, 0x33, 0x39, 0x40, 0x3e, 0x35, + 0x3d, 0x46, 0x38, 0x36, 0x37, 0x43, 0x3a, 0x3c, 0x40, 0x38, 0x39, 0x3b, + 0x39, 0x3a, 0x42, 0x3d, 0x34, 0x3f, 0x35, 0x43, 0x3a, 0x35, 0x46, 0x3a, + 0x48, 0x38, 0x3b, 0x48, 0x3c, 0x35, 0x42, 0x3d, 0x3a, 0x3d, 0x38, 0x42, + 0x3e, 0x3c, 0x33, 0x39, 0x34, 0x30, 0x42, 0x44, 0x41, 0x3d, 0x3c, 0x39, + 0x3c, 0x3a, 0x39, 0x41, 0x3d, 0x44, 0x3c, 0x40, 0x3f, 0x3e, 0x42, 0x3f, + 0x37, 0x40, 0x39, 0x3b, 0x42, 0x43, 0x49, 0x37, 0x39, 0x46, 0x35, 0x3c, + 0x3e, 0x39, 0x45, 0x52, 0x24, 0x2d, 0x38, 0x35, 0x3a, 0x3a, 0x3c, 0x44, + 0x39, 0x32, 0x51, 0x3f, 0x16, 0x34, 0x0a, 0x49, 0x39, 0x38, 0x39, 0x3e, + 0x2f, 0x36, 0x24, 0x3f, 0x37, 0x34, 0x38, 0x3b, 0x34, 0x34, 0x30, 0x3b, + 0x3d, 0x36, 0x35, 0x42, 0x33, 0x40, 0x37, 0x35, 0x43, 0x3f, 0x3f, 0x39, + 0x3a, 0x43, 0x36, 0x3e, 0x39, 0x3d, 0x3f, 0x3d, 0x47, 0x3b, 0x39, 0x37, + 0x35, 0x42, 0x3f, 0x3b, 0x41, 0x3a, 0x42, 0x4b, 0x3d, 0x3f, 0x3d, 0x3e, + 0x38, 0x3b, 0x34, 0x4e, 0x3f, 0x39, 0x36, 0x43, 0x39, 0x35, 0x41, 0x4d, + 0x3c, 0x39, 0x43, 0x33, 0x37, 0x3b, 0x41, 0x48, 0x3c, 0x3f, 0x39, 0x32, + 0x35, 0x3d, 0x42, 0x35, 0x3d, 0x3e, 0x37, 0x3b, 0x38, 0x3a, 0x44, 0x36, + 0x42, 0x35, 0x48, 0x40, 0x3a, 0x44, 0x44, 0x39, 0x43, 0x41, 0x3c, 0x37, + 0x47, 0x3b, 0x42, 0x42, 0x45, 0x3a, 0x40, 0x46, 0x35, 0x3f, 0x3a, 0x48, + 0x35, 0x44, 0x3f, 0x37, 0x33, 0x3e, 0x45, 0x49, 0x39, 0x43, 0x47, 0x37, + 0x3f, 0x3f, 0x3b, 0x44, 0x38, 0x3d, 0x39, 0x42, 0x37, 0x3e, 0x40, 0x45, + 0x3b, 0x3f, 0x40, 0x34, 0x42, 0x3f, 0x43, 0x3c, 0x43, 0x41, 0x38, 0x38, + 0x38, 0x41, 0x55, 0x33, 0x33, 0x39, 0x39, 0x3c, 0x35, 0x39, 0x38, 0x42, + 0x27, 0x26, 0x32, 0x41, 0x41, 0x32, 0x3f, 0x47, 0x3a, 0x38, 0x48, 0x37, + 0x11, 0x27, 0x08, 0x49, 0x35, 0x42, 0x3c, 0x2e, 0x34, 0x43, 0x25, 0x3b, + 0x3a, 0x33, 0x37, 0x30, 0x3c, 0x36, 0x2d, 0x3c, 0x3b, 0x39, 0x3b, 0x40, + 0x46, 0x3a, 0x30, 0x42, 0x35, 0x32, 0x36, 0x3a, 0x3a, 0x34, 0x34, 0x33, + 0x3d, 0x30, 0x3b, 0x42, 0x41, 0x3f, 0x3d, 0x3b, 0x44, 0x3d, 0x41, 0x41, + 0x3d, 0x3f, 0x40, 0x51, 0x42, 0x42, 0x36, 0x45, 0x30, 0x40, 0x32, 0x4f, + 0x3a, 0x3c, 0x40, 0x39, 0x3d, 0x3b, 0x3e, 0x4b, 0x3d, 0x37, 0x42, 0x46, + 0x40, 0x40, 0x47, 0x3d, 0x35, 0x3c, 0x3f, 0x46, 0x37, 0x37, 0x3a, 0x2e, + 0x3d, 0x3c, 0x3a, 0x46, 0x3a, 0x44, 0x3c, 0x3a, 0x32, 0x44, 0x31, 0x41, + 0x43, 0x36, 0x49, 0x39, 0x3d, 0x37, 0x3f, 0x41, 0x3b, 0x3b, 0x3c, 0x42, + 0x3c, 0x34, 0x3f, 0x3b, 0x40, 0x3e, 0x48, 0x47, 0x3e, 0x3c, 0x38, 0x39, + 0x3f, 0x35, 0x39, 0x3f, 0x3e, 0x3e, 0x3b, 0x43, 0x41, 0x40, 0x43, 0x41, + 0x3f, 0x37, 0x39, 0x41, 0x46, 0x32, 0x3d, 0x41, 0x36, 0x3f, 0x3e, 0x3f, + 0x36, 0x48, 0x43, 0x3d, 0x43, 0x3f, 0x34, 0x3d, 0x34, 0x35, 0x4f, 0x32, + 0x3c, 0x3f, 0x3d, 0x3f, 0x39, 0x3c, 0x3d, 0x47, 0x23, 0x36, 0x33, 0x45, + 0x37, 0x2e, 0x42, 0x42, 0x39, 0x34, 0x4f, 0x3f, 0x19, 0x2b, 0x01, 0x50, + 0x35, 0x3f, 0x37, 0x3c, 0x33, 0x35, 0x25, 0x32, 0x38, 0x3e, 0x40, 0x40, + 0x2f, 0x38, 0x35, 0x3d, 0x31, 0x42, 0x44, 0x3c, 0x3a, 0x3d, 0x2d, 0x3e, + 0x3b, 0x3e, 0x3d, 0x31, 0x3b, 0x37, 0x35, 0x31, 0x36, 0x35, 0x34, 0x31, + 0x41, 0x3a, 0x33, 0x32, 0x3c, 0x31, 0x3e, 0x3d, 0x40, 0x3b, 0x34, 0x45, + 0x36, 0x39, 0x3e, 0x3f, 0x3c, 0x45, 0x37, 0x4b, 0x42, 0x3d, 0x33, 0x43, + 0x3e, 0x40, 0x35, 0x4e, 0x38, 0x36, 0x3a, 0x33, 0x38, 0x44, 0x3f, 0x3c, + 0x3f, 0x40, 0x3a, 0x3c, 0x3c, 0x3c, 0x44, 0x29, 0x3a, 0x40, 0x35, 0x3a, + 0x3d, 0x48, 0x3b, 0x30, 0x45, 0x41, 0x45, 0x40, 0x37, 0x32, 0x3a, 0x35, + 0x3f, 0x38, 0x3b, 0x43, 0x3b, 0x3f, 0x33, 0x40, 0x3b, 0x40, 0x38, 0x33, + 0x39, 0x3c, 0x3c, 0x3f, 0x43, 0x33, 0x43, 0x40, 0x43, 0x3d, 0x33, 0x42, + 0x40, 0x32, 0x3e, 0x36, 0x40, 0x38, 0x43, 0x40, 0x44, 0x38, 0x34, 0x3c, + 0x3e, 0x39, 0x47, 0x43, 0x40, 0x3b, 0x3f, 0x3f, 0x3c, 0x3b, 0x4b, 0x33, + 0x36, 0x49, 0x32, 0x41, 0x48, 0x45, 0x57, 0x3a, 0x40, 0x42, 0x40, 0x46, + 0x36, 0x35, 0x3c, 0x46, 0x22, 0x2e, 0x33, 0x3e, 0x3c, 0x39, 0x44, 0x4d, + 0x3f, 0x41, 0x51, 0x44, 0x15, 0x2e, 0x02, 0x4e, 0x39, 0x3a, 0x3c, 0x35, + 0x30, 0x38, 0x1e, 0x31, 0x40, 0x3b, 0x39, 0x3d, 0x3a, 0x37, 0x35, 0x36, + 0x46, 0x36, 0x3c, 0x3e, 0x39, 0x3e, 0x32, 0x40, 0x3b, 0x35, 0x42, 0x41, + 0x41, 0x38, 0x41, 0x35, 0x42, 0x36, 0x3c, 0x42, 0x3d, 0x41, 0x35, 0x31, + 0x3f, 0x44, 0x3e, 0x41, 0x3f, 0x35, 0x42, 0x4b, 0x3e, 0x36, 0x37, 0x34, + 0x36, 0x3d, 0x40, 0x49, 0x41, 0x3e, 0x3d, 0x3b, 0x38, 0x37, 0x40, 0x47, + 0x35, 0x32, 0x43, 0x38, 0x36, 0x3b, 0x33, 0x47, 0x33, 0x34, 0x3d, 0x47, + 0x3c, 0x37, 0x3d, 0x2b, 0x3a, 0x36, 0x3b, 0x3d, 0x43, 0x38, 0x35, 0x32, + 0x32, 0x37, 0x43, 0x36, 0x3f, 0x48, 0x38, 0x30, 0x3a, 0x3c, 0x42, 0x34, + 0x37, 0x3c, 0x37, 0x40, 0x48, 0x3e, 0x35, 0x3b, 0x3f, 0x38, 0x39, 0x3e, + 0x37, 0x35, 0x36, 0x3d, 0x3b, 0x3c, 0x40, 0x3d, 0x34, 0x40, 0x46, 0x42, + 0x3f, 0x3c, 0x3c, 0x3e, 0x40, 0x40, 0x3d, 0x3f, 0x3f, 0x44, 0x46, 0x41, + 0x32, 0x43, 0x40, 0x41, 0x3c, 0x42, 0x39, 0x38, 0x48, 0x44, 0x3d, 0x38, + 0x34, 0x40, 0x4e, 0x31, 0x3c, 0x42, 0x39, 0x48, 0x3c, 0x33, 0x3e, 0x40, + 0x20, 0x27, 0x39, 0x45, 0x45, 0x36, 0x47, 0x4c, 0x35, 0x3e, 0x4a, 0x36, + 0x16, 0x2f, 0x04, 0x4f, 0x3a, 0x35, 0x36, 0x3a, 0x2d, 0x36, 0x21, 0x34, + 0x3b, 0x32, 0x3d, 0x3c, 0x3c, 0x3f, 0x3b, 0x3b, 0x41, 0x46, 0x40, 0x3d, + 0x3b, 0x44, 0x33, 0x42, 0x34, 0x33, 0x3e, 0x45, 0x3f, 0x46, 0x39, 0x33, + 0x3b, 0x37, 0x37, 0x37, 0x42, 0x47, 0x3c, 0x35, 0x31, 0x41, 0x44, 0x3a, + 0x3b, 0x33, 0x39, 0x44, 0x42, 0x33, 0x3d, 0x3f, 0x43, 0x33, 0x41, 0x4a, + 0x35, 0x46, 0x36, 0x3e, 0x39, 0x41, 0x41, 0x4c, 0x34, 0x3d, 0x38, 0x33, + 0x3c, 0x3f, 0x43, 0x44, 0x37, 0x35, 0x35, 0x3c, 0x43, 0x34, 0x3e, 0x2d, + 0x3f, 0x35, 0x38, 0x3c, 0x33, 0x35, 0x43, 0x2a, 0x40, 0x33, 0x34, 0x40, + 0x3d, 0x38, 0x36, 0x2d, 0x36, 0x3c, 0x43, 0x3d, 0x37, 0x3d, 0x39, 0x38, + 0x3b, 0x3e, 0x3c, 0x46, 0x35, 0x35, 0x43, 0x44, 0x39, 0x40, 0x34, 0x39, + 0x3d, 0x34, 0x40, 0x45, 0x38, 0x35, 0x3e, 0x39, 0x3c, 0x44, 0x48, 0x44, + 0x41, 0x3e, 0x3c, 0x45, 0x3a, 0x3c, 0x3c, 0x46, 0x3a, 0x40, 0x39, 0x43, + 0x35, 0x35, 0x3e, 0x45, 0x3a, 0x34, 0x3c, 0x39, 0x46, 0x3a, 0x4f, 0x35, + 0x32, 0x3d, 0x36, 0x41, 0x32, 0x38, 0x3f, 0x45, 0x2d, 0x34, 0x2a, 0x35, + 0x43, 0x3f, 0x41, 0x49, 0x41, 0x3c, 0x4b, 0x3f, 0x17, 0x31, 0x02, 0x4f, + 0x30, 0x38, 0x39, 0x40, 0x33, 0x3a, 0x25, 0x38, 0x35, 0x3c, 0x39, 0x35, + 0x34, 0x41, 0x34, 0x43, 0x40, 0x40, 0x46, 0x3d, 0x40, 0x38, 0x3f, 0x3b, + 0x35, 0x39, 0x3c, 0x39, 0x34, 0x38, 0x3f, 0x36, 0x3a, 0x38, 0x44, 0x3f, + 0x3f, 0x38, 0x3c, 0x33, 0x41, 0x42, 0x38, 0x33, 0x3c, 0x3b, 0x3c, 0x46, + 0x38, 0x3b, 0x3f, 0x33, 0x3f, 0x48, 0x3b, 0x49, 0x3f, 0x3a, 0x3d, 0x3f, + 0x47, 0x3d, 0x30, 0x45, 0x36, 0x42, 0x3d, 0x36, 0x43, 0x38, 0x3b, 0x3d, + 0x3c, 0x30, 0x3b, 0x43, 0x3d, 0x41, 0x34, 0x2e, 0x43, 0x3d, 0x43, 0x46, + 0x43, 0x3c, 0x3c, 0x2e, 0x3c, 0x43, 0x34, 0x43, 0x3e, 0x43, 0x3f, 0x2b, + 0x45, 0x40, 0x3a, 0x43, 0x36, 0x39, 0x3f, 0x3d, 0x3a, 0x3c, 0x35, 0x3b, + 0x36, 0x3f, 0x45, 0x3e, 0x45, 0x40, 0x3f, 0x36, 0x45, 0x42, 0x35, 0x3e, + 0x3a, 0x3a, 0x3f, 0x40, 0x3e, 0x3c, 0x39, 0x46, 0x43, 0x3e, 0x3f, 0x3f, + 0x40, 0x3c, 0x40, 0x4b, 0x41, 0x35, 0x3b, 0x3e, 0x49, 0x32, 0x3e, 0x41, + 0x31, 0x37, 0x3d, 0x3b, 0x3f, 0x45, 0x50, 0x3a, 0x3f, 0x3c, 0x44, 0x36, + 0x43, 0x37, 0x3d, 0x4b, 0x29, 0x39, 0x2f, 0x38, 0x45, 0x36, 0x40, 0x4e, + 0x39, 0x3f, 0x48, 0x43, 0x23, 0x3c, 0x06, 0x51, 0x37, 0x3b, 0x3e, 0x3b, + 0x28, 0x45, 0x2b, 0x37, 0x3f, 0x33, 0x3f, 0x41, 0x31, 0x36, 0x33, 0x3a, + 0x3a, 0x35, 0x3b, 0x33, 0x3e, 0x36, 0x35, 0x40, 0x3a, 0x34, 0x3a, 0x38, + 0x34, 0x3a, 0x3a, 0x34, 0x42, 0x45, 0x40, 0x3e, 0x40, 0x38, 0x39, 0x34, + 0x38, 0x37, 0x3f, 0x3e, 0x3c, 0x32, 0x3f, 0x46, 0x3f, 0x44, 0x3b, 0x3e, + 0x44, 0x45, 0x36, 0x3e, 0x36, 0x3f, 0x3b, 0x40, 0x39, 0x34, 0x38, 0x41, + 0x42, 0x3e, 0x3d, 0x47, 0x3e, 0x45, 0x33, 0x40, 0x3e, 0x3a, 0x44, 0x3d, + 0x3c, 0x3a, 0x3a, 0x2c, 0x3a, 0x3d, 0x35, 0x45, 0x3c, 0x41, 0x36, 0x30, + 0x32, 0x32, 0x3a, 0x3b, 0x35, 0x3c, 0x43, 0x2d, 0x35, 0x3f, 0x41, 0x37, + 0x3f, 0x46, 0x34, 0x39, 0x3c, 0x43, 0x40, 0x3e, 0x3e, 0x36, 0x3e, 0x3c, + 0x37, 0x3a, 0x3d, 0x3a, 0x3c, 0x38, 0x44, 0x41, 0x3f, 0x3b, 0x3c, 0x47, + 0x40, 0x3b, 0x41, 0x47, 0x3e, 0x45, 0x39, 0x3e, 0x37, 0x45, 0x4b, 0x4c, + 0x37, 0x37, 0x37, 0x3c, 0x3c, 0x3d, 0x40, 0x38, 0x39, 0x3e, 0x43, 0x3f, + 0x38, 0x45, 0x51, 0x3c, 0x31, 0x34, 0x3b, 0x48, 0x46, 0x41, 0x40, 0x40, + 0x2c, 0x39, 0x32, 0x42, 0x3c, 0x2e, 0x49, 0x4d, 0x3c, 0x3f, 0x45, 0x38, + 0x20, 0x38, 0x03, 0x55, 0x33, 0x3e, 0x32, 0x39, 0x32, 0x3b, 0x24, 0x2b, + 0x42, 0x35, 0x45, 0x32, 0x2e, 0x3b, 0x2f, 0x3f, 0x3c, 0x37, 0x39, 0x3b, + 0x34, 0x34, 0x3d, 0x36, 0x3d, 0x39, 0x3b, 0x30, 0x3c, 0x3e, 0x40, 0x32, + 0x3d, 0x3c, 0x3c, 0x3e, 0x33, 0x33, 0x3f, 0x3a, 0x33, 0x3e, 0x46, 0x36, + 0x3a, 0x3d, 0x40, 0x40, 0x3f, 0x41, 0x3a, 0x42, 0x34, 0x32, 0x34, 0x46, + 0x3b, 0x31, 0x40, 0x37, 0x37, 0x32, 0x3e, 0x47, 0x3f, 0x3b, 0x3e, 0x43, + 0x49, 0x45, 0x3a, 0x3d, 0x3e, 0x44, 0x40, 0x31, 0x39, 0x3e, 0x3b, 0x2d, + 0x3b, 0x3a, 0x33, 0x3d, 0x39, 0x37, 0x3e, 0x32, 0x41, 0x3c, 0x3a, 0x37, + 0x3b, 0x40, 0x39, 0x2f, 0x3e, 0x3f, 0x47, 0x32, 0x3e, 0x3b, 0x3e, 0x3e, + 0x40, 0x3e, 0x40, 0x3c, 0x41, 0x39, 0x38, 0x46, 0x45, 0x32, 0x47, 0x31, + 0x36, 0x47, 0x37, 0x49, 0x3a, 0x3f, 0x47, 0x3a, 0x41, 0x3b, 0x3c, 0x4f, + 0x3e, 0x36, 0x3b, 0x47, 0x35, 0x39, 0x41, 0x4e, 0x3d, 0x3e, 0x3b, 0x46, + 0x38, 0x39, 0x3b, 0x45, 0x3e, 0x3f, 0x44, 0x42, 0x44, 0x3f, 0x55, 0x3b, + 0x41, 0x3d, 0x43, 0x43, 0x37, 0x3f, 0x3d, 0x4c, 0x28, 0x3d, 0x36, 0x3c, + 0x3e, 0x3e, 0x48, 0x50, 0x3e, 0x39, 0x45, 0x41, 0x22, 0x37, 0x07, 0x4f, + 0x2e, 0x33, 0x38, 0x3f, 0x31, 0x3a, 0x1b, 0x36, 0x34, 0x38, 0x3c, 0x37, + 0x37, 0x3e, 0x36, 0x35, 0x36, 0x3b, 0x3d, 0x38, 0x42, 0x48, 0x3d, 0x40, + 0x40, 0x44, 0x3d, 0x39, 0x37, 0x3b, 0x3d, 0x33, 0x3d, 0x35, 0x42, 0x3c, + 0x39, 0x3e, 0x43, 0x2d, 0x3c, 0x40, 0x43, 0x43, 0x45, 0x35, 0x3c, 0x44, + 0x34, 0x3c, 0x3d, 0x31, 0x39, 0x40, 0x39, 0x3d, 0x3e, 0x34, 0x3e, 0x3b, + 0x40, 0x38, 0x42, 0x4a, 0x40, 0x3b, 0x35, 0x3d, 0x36, 0x38, 0x35, 0x42, + 0x3c, 0x3c, 0x3d, 0x3b, 0x38, 0x39, 0x45, 0x28, 0x3a, 0x37, 0x37, 0x35, + 0x3a, 0x3d, 0x35, 0x2a, 0x3c, 0x3f, 0x37, 0x34, 0x37, 0x3f, 0x3e, 0x2b, + 0x39, 0x43, 0x3b, 0x45, 0x35, 0x36, 0x36, 0x42, 0x33, 0x38, 0x3b, 0x35, + 0x31, 0x3f, 0x41, 0x41, 0x3c, 0x41, 0x45, 0x42, 0x3b, 0x3c, 0x39, 0x46, + 0x3c, 0x3e, 0x3a, 0x41, 0x39, 0x3d, 0x41, 0x4b, 0x40, 0x3f, 0x43, 0x3d, + 0x39, 0x39, 0x44, 0x44, 0x37, 0x42, 0x3f, 0x44, 0x3e, 0x37, 0x42, 0x35, + 0x44, 0x3f, 0x40, 0x42, 0x3f, 0x3a, 0x47, 0x3d, 0x38, 0x3a, 0x3b, 0x3a, + 0x42, 0x36, 0x3a, 0x97, 0x32, 0x31, 0x30, 0x36, 0x47, 0x3e, 0x46, 0x51, + 0x42, 0x34, 0x50, 0x34, 0x26, 0x3b, 0x06, 0x55, 0x3c, 0x3b, 0x2d, 0x3a, + 0x37, 0x37, 0x1b, 0x32, 0x39, 0x3d, 0x36, 0x40, 0x3b, 0x3f, 0x33, 0x33, + 0x3d, 0x37, 0x35, 0x37, 0x44, 0x3f, 0x35, 0x39, 0x33, 0x3c, 0x43, 0x39, + 0x3f, 0x42, 0x3e, 0x34, 0x38, 0x38, 0x39, 0x3c, 0x48, 0x3c, 0x2f, 0x30, + 0x40, 0x3c, 0x41, 0x3e, 0x3f, 0x3e, 0x36, 0x43, 0x40, 0x3c, 0x36, 0x43, + 0x43, 0x38, 0x3a, 0x47, 0x3e, 0x37, 0x39, 0x3a, 0x43, 0x45, 0x38, 0x43, + 0x3b, 0x45, 0x37, 0x44, 0x36, 0x45, 0x3a, 0x3e, 0x3e, 0x3e, 0x3d, 0x33, + 0x39, 0x36, 0x48, 0x33, 0x30, 0x42, 0x33, 0x39, 0x37, 0x3a, 0x3f, 0x34, + 0x34, 0x40, 0x40, 0x40, 0x3f, 0x3d, 0x3f, 0x33, 0x41, 0x40, 0x3b, 0x43, + 0x3b, 0x3a, 0x40, 0x3a, 0x38, 0x3e, 0x38, 0x3b, 0x38, 0x42, 0x40, 0x40, + 0x41, 0x35, 0x37, 0x38, 0x3b, 0x3c, 0x39, 0x4b, 0x32, 0x39, 0x42, 0x3c, + 0x36, 0x3d, 0x32, 0x52, 0x3a, 0x31, 0x40, 0x40, 0x3a, 0x43, 0x3d, 0x46, + 0x3c, 0x3e, 0x3e, 0x33, 0x3f, 0x41, 0x4d, 0x37, 0x39, 0x39, 0x3e, 0x3b, + 0x40, 0x39, 0x53, 0x2d, 0x46, 0x3c, 0x32, 0x42, 0x3d, 0x40, 0x40, 0x4d, + 0x2e, 0x34, 0x39, 0x3b, 0x46, 0x3b, 0x42, 0x4f, 0x3d, 0x39, 0x4e, 0x36, + 0x1a, 0x31, 0x0e, 0x56, 0x36, 0x42, 0x38, 0x44, 0x36, 0x3a, 0x20, 0x30, + 0x36, 0x34, 0x37, 0x38, 0x40, 0x41, 0x2a, 0x35, 0x3b, 0x3b, 0x3a, 0x38, + 0x33, 0x39, 0x36, 0x41, 0x43, 0x39, 0x35, 0x3d, 0x37, 0x3d, 0x33, 0x31, + 0x45, 0x33, 0x3f, 0x3b, 0x44, 0x38, 0x39, 0x34, 0x38, 0x39, 0x38, 0x3d, + 0x3a, 0x3a, 0x41, 0x40, 0x44, 0x3e, 0x3f, 0x45, 0x34, 0x31, 0x34, 0x43, + 0x3b, 0x34, 0x42, 0x3c, 0x3c, 0x43, 0x35, 0x45, 0x36, 0x38, 0x3d, 0x3c, + 0x3f, 0x3d, 0x3e, 0x45, 0x41, 0x43, 0x35, 0x3f, 0x40, 0x3f, 0x3a, 0x34, + 0x3d, 0x32, 0x41, 0x3d, 0x48, 0x42, 0x37, 0x2a, 0x3c, 0x3a, 0x3e, 0x49, + 0x38, 0x36, 0x38, 0x2e, 0x36, 0x37, 0x34, 0x3e, 0x3c, 0x43, 0x43, 0x39, + 0x39, 0x3b, 0x44, 0x46, 0x44, 0x43, 0x37, 0x46, 0x43, 0x34, 0x3b, 0x35, + 0x42, 0x41, 0x3f, 0x3d, 0x3d, 0x3a, 0x42, 0x3e, 0x38, 0x47, 0x3d, 0x49, + 0x45, 0x49, 0x3a, 0x3c, 0x3e, 0x37, 0x40, 0x46, 0x41, 0x33, 0x45, 0x36, + 0x37, 0x44, 0x49, 0x3b, 0x44, 0x40, 0x33, 0x46, 0x37, 0x39, 0x4e, 0x3a, + 0x43, 0x38, 0x3a, 0x42, 0x3a, 0x3d, 0x45, 0x50, 0x26, 0x34, 0x3b, 0x3c, + 0x46, 0x46, 0x4c, 0x54, 0x3f, 0x35, 0x4e, 0x47, 0x21, 0x39, 0x0e, 0x54, + 0x3a, 0x3a, 0x2f, 0x40, 0x2d, 0x3a, 0x1f, 0x31, 0x31, 0x42, 0x34, 0x45, + 0x37, 0x36, 0x30, 0x3b, 0x3a, 0x3a, 0x36, 0x40, 0x32, 0x36, 0x3c, 0x3c, + 0x37, 0x42, 0x35, 0x3e, 0x39, 0x47, 0x36, 0x32, 0x41, 0x30, 0x42, 0x39, + 0x39, 0x44, 0x37, 0x30, 0x41, 0x3b, 0x3d, 0x3d, 0x43, 0x3b, 0x38, 0x45, + 0x3b, 0x3a, 0x39, 0x3a, 0x31, 0x33, 0x43, 0x46, 0x3f, 0x41, 0x44, 0x3f, + 0x3b, 0x44, 0x3a, 0x4c, 0x33, 0x33, 0x33, 0x3e, 0x37, 0x3e, 0x45, 0x45, + 0x36, 0x42, 0x3e, 0x43, 0x40, 0x34, 0x36, 0x31, 0x38, 0x34, 0x41, 0x3b, + 0x32, 0x38, 0x3e, 0x29, 0x47, 0x33, 0x37, 0x45, 0x3c, 0x3d, 0x43, 0x2c, + 0x36, 0x3a, 0x3c, 0x40, 0x3d, 0x46, 0x3c, 0x37, 0x40, 0x44, 0x37, 0x38, + 0x3e, 0x41, 0x3c, 0x40, 0x33, 0x3f, 0x44, 0x32, 0x44, 0x3a, 0x43, 0x42, + 0x3e, 0x38, 0x44, 0x3b, 0x41, 0x48, 0x3f, 0x4e, 0x3f, 0x44, 0x35, 0x45, + 0x34, 0x3f, 0x42, 0x4b, 0x37, 0x37, 0x3e, 0x45, 0x46, 0x45, 0x46, 0x3d, + 0x3e, 0x39, 0x3b, 0x3a, 0x46, 0x3a, 0x56, 0x35, 0x46, 0x3d, 0x40, 0x3b, + 0x36, 0x39, 0x3f, 0x54, 0x27, 0x2b, 0x34, 0x3c, 0x48, 0x3d, 0x49, 0x4c, + 0x3e, 0x3d, 0x4e, 0x42, 0x25, 0x3b, 0x10, 0x4d, 0x30, 0x36, 0x3e, 0x36, + 0x2e, 0x31, 0x1d, 0x37, 0x3a, 0x39, 0x33, 0x3f, 0x39, 0x38, 0x2e, 0x36, + 0x44, 0x3e, 0x41, 0x37, 0x3b, 0x30, 0x3b, 0x48, 0x31, 0x39, 0x41, 0x3e, + 0x37, 0x37, 0x34, 0x2f, 0x35, 0x3b, 0x3a, 0x3e, 0x45, 0x3e, 0x3f, 0x35, + 0x39, 0x39, 0x3b, 0x44, 0x43, 0x3c, 0x3e, 0x46, 0x40, 0x3a, 0x36, 0x45, + 0x41, 0x40, 0x36, 0x44, 0x3a, 0x37, 0x47, 0x47, 0x3d, 0x36, 0x43, 0x4e, + 0x3b, 0x38, 0x40, 0x48, 0x44, 0x43, 0x45, 0x3f, 0x43, 0x3c, 0x3b, 0x37, + 0x43, 0x41, 0x39, 0x2f, 0x3d, 0x45, 0x3e, 0x3e, 0x42, 0x40, 0x41, 0x2f, + 0x47, 0x38, 0x3a, 0x48, 0x3e, 0x35, 0x37, 0x2a, 0x34, 0x38, 0x41, 0x3b, + 0x3d, 0x37, 0x3b, 0x35, 0x38, 0x3e, 0x41, 0x3c, 0x41, 0x43, 0x3d, 0x46, + 0x47, 0x47, 0x3d, 0x35, 0x48, 0x41, 0x3d, 0x3e, 0x34, 0x47, 0x38, 0x38, + 0x39, 0x3e, 0x38, 0x4d, 0x43, 0x36, 0x42, 0x40, 0x3e, 0x41, 0x3f, 0x4c, + 0x3e, 0x3e, 0x37, 0x44, 0x3e, 0x3b, 0x47, 0x3e, 0x3f, 0x3b, 0x39, 0x3c, + 0x3c, 0x3c, 0x53, 0x3b, 0x3b, 0x32, 0x3e, 0x3f, 0x32, 0x3c, 0x37, 0x4b, + 0x33, 0x30, 0x2f, 0x41, 0x47, 0x42, 0x49, 0x4f, 0x3b, 0x42, 0x4c, 0x44, + 0x1f, 0x37, 0x16, 0x4e, 0x3b, 0x3f, 0x30, 0x36, 0x35, 0x38, 0x26, 0x36, + 0x32, 0x3b, 0x38, 0x3c, 0x30, 0x3e, 0x34, 0x3e, 0x3d, 0x34, 0x39, 0x3c, + 0x36, 0x47, 0x34, 0x41, 0x31, 0x39, 0x44, 0x3e, 0x39, 0x41, 0x32, 0x36, + 0x3b, 0x3f, 0x32, 0x3d, 0x36, 0x3e, 0x40, 0x3d, 0x45, 0x32, 0x45, 0x42, + 0x38, 0x43, 0x40, 0x42, 0x34, 0x3a, 0x43, 0x38, 0x47, 0x3f, 0x41, 0x47, + 0x34, 0x44, 0x41, 0x39, 0x3c, 0x46, 0x36, 0x4f, 0x41, 0x3e, 0x38, 0x38, + 0x3a, 0x3b, 0x43, 0x44, 0x37, 0x3f, 0x35, 0x43, 0x34, 0x3d, 0x40, 0x32, + 0x3a, 0x3b, 0x3d, 0x34, 0x35, 0x43, 0x31, 0x2c, 0x3b, 0x36, 0x38, 0x41, + 0x3c, 0x38, 0x3d, 0x31, 0x45, 0x46, 0x42, 0x41, 0x33, 0x3f, 0x3f, 0x3a, + 0x36, 0x3f, 0x3c, 0x3c, 0x3c, 0x3e, 0x39, 0x3e, 0x40, 0x37, 0x47, 0x3e, + 0x35, 0x39, 0x3d, 0x3d, 0x37, 0x36, 0x3e, 0x45, 0x38, 0x3d, 0x45, 0x43, + 0x3a, 0x32, 0x3b, 0x3a, 0x32, 0x3c, 0x3d, 0x43, 0x3d, 0x33, 0x3b, 0x3d, + 0x46, 0x3a, 0x44, 0x45, 0x3b, 0x3e, 0x3c, 0x42, 0x37, 0x37, 0x52, 0x2a, + 0x3a, 0x35, 0x35, 0x3f, 0x40, 0x38, 0x40, 0x5b, 0x35, 0x32, 0x2b, 0x3d, + 0x4a, 0x3c, 0x46, 0x56, 0x44, 0x30, 0x4d, 0x39, 0x20, 0x32, 0x0f, 0x4f, + 0x33, 0x3c, 0x35, 0x35, 0x3a, 0x45, 0x29, 0x3b, 0x31, 0x38, 0x34, 0x38, + 0x42, 0x45, 0x37, 0x3e, 0x37, 0x2e, 0x36, 0x43, 0x3f, 0x38, 0x2f, 0x41, + 0x3f, 0x41, 0x3c, 0x31, 0x37, 0x36, 0x37, 0x39, 0x41, 0x3a, 0x3a, 0x40, + 0x3e, 0x47, 0x3d, 0x37, 0x3c, 0x38, 0x35, 0x39, 0x3a, 0x43, 0x3f, 0x42, + 0x42, 0x38, 0x3e, 0x40, 0x3c, 0x3a, 0x45, 0x48, 0x37, 0x3a, 0x3e, 0x35, + 0x3a, 0x3d, 0x45, 0x4a, 0x3d, 0x37, 0x38, 0x3a, 0x3d, 0x46, 0x46, 0x41, + 0x37, 0x41, 0x40, 0x48, 0x37, 0x34, 0x3b, 0x2c, 0x39, 0x34, 0x37, 0x35, + 0x3a, 0x43, 0x39, 0x2e, 0x39, 0x3f, 0x40, 0x3e, 0x40, 0x40, 0x3c, 0x2d, + 0x3e, 0x3c, 0x37, 0x39, 0x3c, 0x3b, 0x3d, 0x3f, 0x41, 0x48, 0x3b, 0x3d, + 0x3b, 0x41, 0x45, 0x3e, 0x3a, 0x38, 0x3f, 0x3c, 0x3d, 0x3e, 0x40, 0x42, + 0x46, 0x38, 0x43, 0x34, 0x35, 0x47, 0x3d, 0x46, 0x3f, 0x3e, 0x32, 0x3f, + 0x3e, 0x3d, 0x47, 0x46, 0x38, 0x41, 0x45, 0x3f, 0x34, 0x3f, 0x41, 0x43, + 0x3e, 0x3e, 0x44, 0x3b, 0x3b, 0x36, 0x51, 0x32, 0x37, 0x3c, 0x42, 0x43, + 0x33, 0x39, 0x42, 0x61, 0x2c, 0x3b, 0x2e, 0x39, 0x42, 0x39, 0x42, 0x54, + 0x3c, 0x3a, 0x48, 0x35, 0x26, 0x34, 0x15, 0x51, 0x35, 0x40, 0x36, 0x3c, + 0x2d, 0x37, 0x25, 0x38, 0x33, 0x3d, 0x3d, 0x39, 0x3e, 0x3b, 0x2e, 0x4b, + 0x3d, 0x3b, 0x42, 0x37, 0x37, 0x40, 0x37, 0x40, 0x35, 0x45, 0x37, 0x37, + 0x3f, 0x41, 0x36, 0x39, 0x3c, 0x32, 0x3e, 0x38, 0x41, 0x40, 0x3e, 0x3f, + 0x3b, 0x3c, 0x43, 0x35, 0x3e, 0x3d, 0x44, 0x44, 0x3a, 0x36, 0x39, 0x3f, + 0x3a, 0x31, 0x42, 0x4d, 0x40, 0x33, 0x40, 0x45, 0x44, 0x3d, 0x40, 0x49, + 0x41, 0x3f, 0x42, 0x3a, 0x34, 0x46, 0x38, 0x46, 0x42, 0x34, 0x3a, 0x40, + 0x40, 0x41, 0x3d, 0x32, 0x35, 0x48, 0x35, 0x3e, 0x44, 0x41, 0x40, 0x2c, + 0x46, 0x38, 0x38, 0x3f, 0x36, 0x40, 0x38, 0x2a, 0x43, 0x41, 0x3e, 0x35, + 0x46, 0x3a, 0x45, 0x46, 0x46, 0x42, 0x3a, 0x3b, 0x40, 0x38, 0x35, 0x43, + 0x38, 0x3d, 0x3b, 0x41, 0x36, 0x44, 0x3f, 0x3f, 0x34, 0x3e, 0x3c, 0x3d, + 0x49, 0x36, 0x37, 0x4b, 0x38, 0x3c, 0x43, 0x37, 0x3a, 0x3f, 0x31, 0x45, + 0x3b, 0x39, 0x3f, 0x40, 0x37, 0x3c, 0x42, 0x3f, 0x3c, 0x33, 0x40, 0x3b, + 0x32, 0x3c, 0x52, 0x31, 0x3d, 0x44, 0x3b, 0x31, 0x46, 0x38, 0x40, 0x60, + 0x2b, 0x3c, 0x37, 0x34, 0x43, 0x38, 0x45, 0x57, 0x37, 0x39, 0x49, 0x33, + 0x2d, 0x3f, 0x18, 0x4e, 0x39, 0x39, 0x32, 0x3b, 0x34, 0x3b, 0x2c, 0x45, + 0x33, 0x37, 0x45, 0x42, 0x3d, 0x37, 0x2a, 0x4c, 0x3d, 0x3f, 0x3c, 0x36, + 0x37, 0x3c, 0x39, 0x47, 0x3d, 0x44, 0x3d, 0x40, 0x3d, 0x41, 0x34, 0x3e, + 0x40, 0x34, 0x3b, 0x3a, 0x41, 0x36, 0x37, 0x40, 0x3e, 0x3f, 0x3a, 0x36, + 0x3e, 0x35, 0x3b, 0x48, 0x41, 0x40, 0x3c, 0x42, 0x34, 0x41, 0x3f, 0x44, + 0x34, 0x39, 0x33, 0x39, 0x39, 0x47, 0x40, 0x48, 0x38, 0x3a, 0x43, 0x43, + 0x48, 0x3a, 0x3f, 0x46, 0x35, 0x3a, 0x33, 0x36, 0x32, 0x3c, 0x40, 0x34, + 0x40, 0x3a, 0x42, 0x3a, 0x39, 0x38, 0x41, 0x35, 0x3a, 0x3f, 0x35, 0x40, + 0x3f, 0x39, 0x39, 0x36, 0x38, 0x40, 0x3e, 0x3e, 0x3a, 0x31, 0x32, 0x44, + 0x40, 0x47, 0x3a, 0x3c, 0x43, 0x43, 0x46, 0x48, 0x40, 0x35, 0x3d, 0x37, + 0x44, 0x37, 0x33, 0x44, 0x3b, 0x3e, 0x3f, 0x37, 0x36, 0x3a, 0x38, 0x47, + 0x3a, 0x44, 0x36, 0x42, 0x3e, 0x44, 0x34, 0x46, 0x33, 0x43, 0x44, 0x3e, + 0x30, 0x48, 0x37, 0x38, 0x33, 0x3c, 0x46, 0x42, 0x38, 0x3d, 0x50, 0x39, + 0x33, 0x38, 0x3e, 0x40, 0x3b, 0x2b, 0x3b, 0x5f, 0x2b, 0x32, 0x2f, 0x37, + 0x3f, 0x3a, 0x40, 0x4e, 0x34, 0x38, 0x47, 0x37, 0x27, 0x2b, 0x1b, 0x4f, + 0x36, 0x38, 0x3a, 0x3a, 0x3b, 0x38, 0x2e, 0x3f, 0x3f, 0x42, 0x42, 0x42, + 0x36, 0x3e, 0x3c, 0x55, 0x39, 0x40, 0x44, 0x43, 0x3e, 0x33, 0x3c, 0x43, + 0x38, 0x44, 0x3b, 0x46, 0x3f, 0x45, 0x34, 0x38, 0x3c, 0x41, 0x42, 0x3d, + 0x42, 0x36, 0x43, 0x3f, 0x3c, 0x39, 0x3e, 0x39, 0x39, 0x42, 0x33, 0x47, + 0x36, 0x3d, 0x3f, 0x3b, 0x40, 0x39, 0x3b, 0x49, 0x36, 0x40, 0x3d, 0x41, + 0x40, 0x34, 0x3b, 0x4e, 0x3b, 0x36, 0x3b, 0x45, 0x40, 0x32, 0x3b, 0x49, + 0x37, 0x38, 0x3a, 0x47, 0x37, 0x40, 0x3e, 0x38, 0x40, 0x3f, 0x3c, 0x3a, + 0x47, 0x41, 0x42, 0x30, 0x40, 0x3c, 0x42, 0x3f, 0x31, 0x44, 0x39, 0x38, + 0x3b, 0x38, 0x42, 0x43, 0x41, 0x35, 0x3a, 0x39, 0x3e, 0x38, 0x39, 0x3e, + 0x3c, 0x42, 0x3d, 0x49, 0x47, 0x3c, 0x3f, 0x35, 0x41, 0x3a, 0x36, 0x43, + 0x43, 0x3b, 0x39, 0x3b, 0x36, 0x43, 0x43, 0x4e, 0x3e, 0x35, 0x37, 0x3b, + 0x3f, 0x37, 0x41, 0x48, 0x32, 0x44, 0x43, 0x32, 0x38, 0x39, 0x45, 0x39, + 0x3e, 0x3d, 0x35, 0x39, 0x35, 0x39, 0x50, 0x37, 0x39, 0x40, 0x43, 0x47, + 0x32, 0x2a, 0x40, 0x62, 0x24, 0x30, 0x36, 0x3e, 0x41, 0x32, 0x47, 0x58, + 0x39, 0x36, 0x44, 0x34, 0x26, 0x34, 0x1e, 0x50, 0x3c, 0x3b, 0x3f, 0x42, + 0x35, 0x3d, 0x2a, 0x4e, 0x40, 0x38, 0x36, 0x31, 0x3a, 0x30, 0x37, 0x4b, + 0x3c, 0x3b, 0x3b, 0x41, 0x3b, 0x3c, 0x2e, 0x45, 0x44, 0x3f, 0x3b, 0x35, + 0x3e, 0x33, 0x37, 0x3d, 0x40, 0x39, 0x39, 0x37, 0x40, 0x3e, 0x3a, 0x3e, + 0x3c, 0x3c, 0x45, 0x40, 0x3c, 0x3f, 0x3a, 0x51, 0x47, 0x3a, 0x34, 0x39, + 0x3b, 0x34, 0x44, 0x4c, 0x36, 0x3d, 0x3a, 0x35, 0x34, 0x36, 0x38, 0x4b, + 0x3f, 0x40, 0x3f, 0x3e, 0x40, 0x41, 0x47, 0x43, 0x32, 0x38, 0x46, 0x44, + 0x46, 0x43, 0x43, 0x37, 0x39, 0x49, 0x37, 0x36, 0x3e, 0x3d, 0x37, 0x3c, + 0x39, 0x37, 0x34, 0x43, 0x45, 0x32, 0x3a, 0x3a, 0x38, 0x43, 0x3b, 0x40, + 0x3b, 0x3f, 0x3d, 0x41, 0x40, 0x3d, 0x3a, 0x3b, 0x48, 0x37, 0x3d, 0x41, + 0x40, 0x3e, 0x38, 0x41, 0x3d, 0x3a, 0x38, 0x49, 0x40, 0x3c, 0x42, 0x41, + 0x3a, 0x38, 0x38, 0x4c, 0x3e, 0x41, 0x40, 0x3b, 0x3d, 0x3e, 0x3c, 0x46, + 0x3e, 0x42, 0x41, 0x38, 0x42, 0x42, 0x41, 0x3e, 0x3e, 0x37, 0x3c, 0x43, + 0x43, 0x3b, 0x54, 0x2b, 0x45, 0x3b, 0x43, 0x41, 0x41, 0x26, 0x3f, 0x60, + 0x25, 0x2b, 0x2e, 0x3a, 0x40, 0x31, 0x40, 0x49, 0x40, 0x31, 0x46, 0x3c, + 0x1e, 0x2a, 0x1a, 0x47, 0x33, 0x37, 0x37, 0x34, 0x31, 0x36, 0x25, 0x41, + 0x2e, 0x36, 0x35, 0x33, 0x33, 0x34, 0x31, 0x45, 0x3a, 0x3f, 0x3d, 0x40, + 0x3c, 0x41, 0x30, 0x3c, 0x3f, 0x46, 0x37, 0x3c, 0x3a, 0x3c, 0x36, 0x3a, + 0x47, 0x3d, 0x31, 0x3f, 0x40, 0x3e, 0x36, 0x44, 0x41, 0x3d, 0x36, 0x3f, + 0x37, 0x3f, 0x34, 0x4b, 0x31, 0x47, 0x43, 0x3e, 0x3e, 0x3a, 0x3b, 0x4b, + 0x37, 0x32, 0x38, 0x3d, 0x37, 0x47, 0x46, 0x4d, 0x36, 0x3c, 0x3f, 0x3a, + 0x41, 0x31, 0x47, 0x43, 0x3d, 0x3d, 0x3e, 0x35, 0x3d, 0x46, 0x49, 0x2a, + 0x37, 0x3c, 0x39, 0x3d, 0x47, 0x3c, 0x34, 0x2c, 0x3e, 0x38, 0x47, 0x32, + 0x36, 0x36, 0x41, 0x38, 0x35, 0x44, 0x48, 0x3b, 0x39, 0x3e, 0x38, 0x3e, + 0x40, 0x36, 0x37, 0x46, 0x39, 0x3b, 0x34, 0x45, 0x40, 0x3b, 0x48, 0x36, + 0x34, 0x44, 0x37, 0x46, 0x3f, 0x42, 0x33, 0x36, 0x43, 0x3c, 0x41, 0x46, + 0x31, 0x42, 0x43, 0x44, 0x44, 0x3e, 0x42, 0x3b, 0x3b, 0x3a, 0x3c, 0x37, + 0x42, 0x41, 0x46, 0x38, 0x41, 0x3b, 0x40, 0x44, 0x37, 0x3c, 0x4c, 0x2e, + 0x3a, 0x3e, 0x3b, 0x36, 0x33, 0x27, 0x37, 0x5d, 0x27, 0x34, 0x32, 0x41, + 0x41, 0x3f, 0x40, 0x5d, 0x40, 0x3d, 0x48, 0x39, 0x2e, 0x30, 0x1f, 0x3f, + 0x38, 0x3f, 0x40, 0x33, 0x40, 0x38, 0x31, 0x3f, 0x42, 0x3e, 0x3b, 0x3a, + 0x42, 0x36, 0x3a, 0x42, 0x3c, 0x3b, 0x3d, 0x41, 0x3d, 0x40, 0x40, 0x3e, + 0x36, 0x41, 0x47, 0x3d, 0x33, 0x32, 0x33, 0x44, 0x3e, 0x3a, 0x3e, 0x3d, + 0x45, 0x3f, 0x38, 0x3f, 0x40, 0x3a, 0x3c, 0x46, 0x32, 0x42, 0x3c, 0x51, + 0x33, 0x38, 0x3a, 0x38, 0x41, 0x34, 0x45, 0x4e, 0x35, 0x3c, 0x42, 0x3e, + 0x3f, 0x45, 0x44, 0x4e, 0x39, 0x47, 0x3a, 0x33, 0x3e, 0x3b, 0x45, 0x42, + 0x37, 0x3a, 0x3e, 0x33, 0x41, 0x48, 0x32, 0x2a, 0x3b, 0x37, 0x3f, 0x3d, + 0x3a, 0x42, 0x41, 0x2f, 0x34, 0x3e, 0x49, 0x3b, 0x38, 0x3e, 0x3d, 0x3a, + 0x37, 0x3c, 0x44, 0x41, 0x39, 0x42, 0x3f, 0x39, 0x40, 0x35, 0x3d, 0x41, + 0x3b, 0x45, 0x44, 0x48, 0x3d, 0x42, 0x36, 0x33, 0x3e, 0x44, 0x3f, 0x41, + 0x42, 0x40, 0x49, 0x34, 0x48, 0x41, 0x3f, 0x40, 0x3c, 0x45, 0x47, 0x34, + 0x41, 0x37, 0x47, 0x3e, 0x41, 0x41, 0x39, 0x42, 0x3f, 0x3a, 0x46, 0x33, + 0x39, 0x41, 0x38, 0x38, 0x3e, 0x42, 0x41, 0x38, 0x35, 0x32, 0x33, 0x38, + 0x3a, 0x3f, 0x45, 0x66, 0x33, 0x47, 0x38, 0x3c, 0x41, 0x2f, 0x48, 0x55, + 0x33, 0x3e, 0x49, 0x3b, 0x3c, 0x30, 0x24, 0x45, 0x3c, 0x44, 0x43, 0x32, + 0x3d, 0x3f, 0x35, 0x3b, 0x3e, 0x36, 0x38, 0x3a, 0x36, 0x37, 0x3b, 0x41, + 0x38, 0x42, 0x3e, 0x43, 0x39, 0x3f, 0x3c, 0x40, 0x37, 0x43, 0x3e, 0x3b, + 0x3d, 0x35, 0x35, 0x3d, 0x43, 0x3f, 0x3a, 0x35, 0x37, 0x3c, 0x31, 0x47, + 0x44, 0x45, 0x40, 0x32, 0x44, 0x36, 0x38, 0x51, 0x3c, 0x41, 0x45, 0x37, + 0x39, 0x44, 0x3e, 0x4f, 0x3c, 0x3a, 0x38, 0x40, 0x3f, 0x34, 0x39, 0x4e, + 0x3d, 0x39, 0x45, 0x3f, 0x3e, 0x3c, 0x3b, 0x42, 0x3b, 0x3b, 0x34, 0x3d, + 0x41, 0x44, 0x39, 0x2e, 0x37, 0x44, 0x45, 0x37, 0x3d, 0x41, 0x3f, 0x33, + 0x3f, 0x3e, 0x3e, 0x40, 0x44, 0x3f, 0x37, 0x32, 0x35, 0x3e, 0x43, 0x41, + 0x39, 0x37, 0x35, 0x3f, 0x48, 0x3d, 0x43, 0x49, 0x38, 0x35, 0x3f, 0x48, + 0x3b, 0x3a, 0x34, 0x3f, 0x3c, 0x44, 0x3a, 0x40, 0x36, 0x35, 0x44, 0x36, + 0x44, 0x3b, 0x3d, 0x38, 0x3c, 0x44, 0x47, 0x3a, 0x3b, 0x45, 0x41, 0x3a, + 0x39, 0x35, 0x44, 0x3a, 0x49, 0x36, 0x48, 0x31, 0x42, 0x43, 0x42, 0x34, + 0x41, 0x40, 0x4d, 0x36, 0x3e, 0x35, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x39, + 0x3c, 0x44, 0x3f, 0x39, 0x3a, 0x36, 0x3d, 0x36, 0x3a, 0x3a, 0x34, 0x3b, + 0x38, 0x2f, 0x40, 0x34, 0x32, 0x4d, 0x43, 0x45, 0x4e, 0x3f, 0x48, 0x35, + 0x3b, 0x4d, 0x4f, 0x39, 0x42, 0x36, 0x46, 0x36, 0x4a, 0x3c, 0x37, 0x41, + 0x40, 0x43, 0x50, 0x36, 0x3e, 0x39, 0x44, 0x40, 0x36, 0x47, 0x3f, 0x36, + 0x45, 0x40, 0x45, 0x41, 0x3b, 0x37, 0x41, 0x39, 0x3b, 0x48, 0x37, 0x34, + 0x41, 0x45, 0x49, 0x3f, 0x39, 0x49, 0x3f, 0x3a, 0x42, 0x34, 0x38, 0x37, + 0x44, 0x34, 0x3c, 0x3d, 0x40, 0x47, 0x3a, 0x36, 0x3f, 0x3c, 0x41, 0x3e, + 0x47, 0x46, 0x46, 0x43, 0x3f, 0x38, 0x3b, 0x40, 0x3f, 0x48, 0x3b, 0x4c, + 0x3d, 0x4b, 0x34, 0x3b, 0x44, 0x43, 0x3c, 0x49, 0x38, 0x42, 0x41, 0x36, + 0x33, 0x36, 0x40, 0x46, 0x40, 0x3a, 0x42, 0x3c, 0x3d, 0x35, 0x3c, 0x52, + 0x3e, 0x40, 0x43, 0x43, 0x41, 0x3b, 0x3e, 0x44, 0x3f, 0x40, 0x40, 0x43, + 0x3d, 0x3f, 0x36, 0x42, 0x3f, 0x3c, 0x34, 0x3d, 0x33, 0x41, 0x3c, 0x39, + 0x34, 0x43, 0x3f, 0x34, 0x3c, 0x3a, 0x3a, 0x37, 0x42, 0x41, 0x40, 0x3e, + 0x3d, 0x3c, 0x41, 0x3c, 0x38, 0x33, 0x49, 0x46, 0x40, 0x40, 0x3a, 0x46, + 0x38, 0x3c, 0x37, 0x34, 0x3e, 0x3d, 0x32, 0x38, 0x3c, 0x4c, 0x3a, 0x34, + 0x35, 0x32, 0x39, 0x40, 0x3a, 0x58, 0x40, 0x46, 0x42, 0x33, 0x45, 0x39, + 0x34, 0x4f, 0x53, 0x45, 0x43, 0x3e, 0x41, 0x36, 0x3e, 0x3f, 0x40, 0x47, + 0x4e, 0x3d, 0x53, 0x2b, 0x41, 0x36, 0x3e, 0x38, 0x47, 0x41, 0x3f, 0x34, + 0x47, 0x40, 0x38, 0x39, 0x3d, 0x42, 0x3f, 0x3c, 0x48, 0x3a, 0x35, 0x3c, + 0x45, 0x49, 0x3c, 0x33, 0x33, 0x3f, 0x3c, 0x46, 0x43, 0x3f, 0x45, 0x31, + 0x35, 0x43, 0x46, 0x3a, 0x45, 0x3c, 0x37, 0x3a, 0x37, 0x36, 0x35, 0x3f, + 0x38, 0x49, 0x34, 0x3f, 0x3c, 0x42, 0x49, 0x3e, 0x3e, 0x3c, 0x39, 0x49, + 0x3e, 0x3c, 0x3b, 0x43, 0x44, 0x45, 0x39, 0x4b, 0x47, 0x47, 0x3e, 0x33, + 0x3c, 0x31, 0x34, 0x4f, 0x45, 0x43, 0x40, 0x3d, 0x42, 0x3b, 0x43, 0x50, + 0x3c, 0x3b, 0x37, 0x42, 0x47, 0x42, 0x3e, 0x4a, 0x3f, 0x3a, 0x48, 0x3d, + 0x48, 0x45, 0x3e, 0x40, 0x3a, 0x3c, 0x3d, 0x39, 0x41, 0x42, 0x3c, 0x42, + 0x43, 0x3c, 0x3b, 0x3d, 0x47, 0x49, 0x38, 0x3c, 0x46, 0x3a, 0x3c, 0x3f, + 0x3a, 0x46, 0x3a, 0x3b, 0x3d, 0x3a, 0x49, 0x46, 0x38, 0x40, 0x3e, 0x38, + 0x37, 0x32, 0x40, 0x3c, 0x42, 0x3d, 0x3b, 0x40, 0x3a, 0x38, 0x49, 0x33, + 0x40, 0x38, 0x2b, 0x3a, 0x3c, 0x4f, 0x4d, 0x3e, 0x35, 0x3d, 0x3b, 0x40, + 0x3a, 0x54, 0x3e, 0x3e, 0x43, 0x30, 0x47, 0x3d, 0x3b, 0x53, 0x52, 0x4a, + 0x43, 0x41, 0x49, 0x37, 0x3b, 0x35, 0x44, 0x3c, 0x45, 0x40, 0x4f, 0x36, + 0x4b, 0x42, 0x41, 0x3a, 0x41, 0x44, 0x47, 0x32, 0x43, 0x35, 0x3f, 0x37, + 0x43, 0x41, 0x43, 0x36, 0x3f, 0x3b, 0x3d, 0x38, 0x3d, 0x40, 0x42, 0x36, + 0x44, 0x3a, 0x39, 0x47, 0x37, 0x34, 0x42, 0x3a, 0x37, 0x38, 0x37, 0x3f, + 0x36, 0x3b, 0x45, 0x3f, 0x3f, 0x3d, 0x39, 0x3d, 0x39, 0x41, 0x37, 0x3f, + 0x3f, 0x3d, 0x3f, 0x41, 0x43, 0x41, 0x45, 0x43, 0x41, 0x3c, 0x3e, 0x40, + 0x40, 0x39, 0x41, 0x4f, 0x47, 0x42, 0x46, 0x48, 0x3b, 0x3b, 0x3c, 0x46, + 0x47, 0x3e, 0x46, 0x37, 0x38, 0x3d, 0x38, 0x52, 0x36, 0x46, 0x3c, 0x3a, + 0x3b, 0x37, 0x48, 0x4b, 0x3f, 0x42, 0x3c, 0x36, 0x40, 0x37, 0x33, 0x4c, + 0x39, 0x34, 0x41, 0x34, 0x3f, 0x3b, 0x35, 0x4b, 0x3b, 0x45, 0x43, 0x31, + 0x3e, 0x39, 0x30, 0x3d, 0x32, 0x43, 0x44, 0x3c, 0x3e, 0x38, 0x43, 0x41, + 0x3e, 0x37, 0x41, 0x39, 0x39, 0x44, 0x43, 0x38, 0x3f, 0x37, 0x48, 0x3f, + 0x3b, 0x44, 0x37, 0x3f, 0x3a, 0x3f, 0x3b, 0x33, 0x42, 0x3e, 0x2f, 0x42, + 0x44, 0x4f, 0x52, 0x3c, 0x34, 0x33, 0x39, 0x46, 0x31, 0x55, 0x43, 0x4e, + 0x49, 0x38, 0x4d, 0x48, 0x34, 0x4d, 0x5c, 0x4d, 0x49, 0x37, 0x4f, 0x40, + 0x3c, 0x3d, 0x41, 0x42, 0x3f, 0x51, 0x4b, 0x2f, 0x46, 0x35, 0x39, 0x3c, + 0x49, 0x3d, 0x4e, 0x32, 0x43, 0x47, 0x31, 0x3e, 0x42, 0x4a, 0x4c, 0x39, + 0x43, 0x46, 0x3e, 0x3f, 0x44, 0x3c, 0x42, 0x30, 0x3e, 0x34, 0x3b, 0x3b, + 0x3a, 0x3c, 0x42, 0x3d, 0x3d, 0x48, 0x48, 0x36, 0x3a, 0x45, 0x38, 0x40, + 0x3c, 0x41, 0x3f, 0x49, 0x42, 0x41, 0x38, 0x3d, 0x3d, 0x44, 0x3b, 0x3d, + 0x35, 0x48, 0x43, 0x3b, 0x32, 0x41, 0x3e, 0x3a, 0x46, 0x41, 0x40, 0x54, + 0x38, 0x3f, 0x3c, 0x36, 0x3b, 0x36, 0x43, 0x50, 0x38, 0x3c, 0x44, 0x3b, + 0x43, 0x47, 0x32, 0x50, 0x3d, 0x46, 0x3d, 0x3b, 0x39, 0x37, 0x3b, 0x4a, + 0x47, 0x43, 0x46, 0x3d, 0x3d, 0x41, 0x43, 0x45, 0x3b, 0x3c, 0x39, 0x47, + 0x43, 0x42, 0x39, 0x4c, 0x34, 0x41, 0x45, 0x3b, 0x38, 0x3e, 0x37, 0x3f, + 0x45, 0x43, 0x39, 0x42, 0x3c, 0x3d, 0x3d, 0x3c, 0x48, 0x39, 0x3b, 0x3a, + 0x46, 0x45, 0x3d, 0x3a, 0x3f, 0x3a, 0x45, 0x36, 0x3d, 0x43, 0x36, 0x43, + 0x42, 0x3d, 0x41, 0x3f, 0x3a, 0x3f, 0x31, 0x37, 0x48, 0x4f, 0x4e, 0x36, + 0x30, 0x3a, 0x3e, 0x3e, 0x38, 0x57, 0x40, 0x47, 0x47, 0x38, 0x4f, 0x46, + 0x3d, 0x4a, 0x50, 0x4c, 0x42, 0x3b, 0x4d, 0x3d, 0x3d, 0x33, 0x40, 0x41, + 0x48, 0x4b, 0x46, 0x39, 0x4d, 0x30, 0x45, 0x38, 0x48, 0x3c, 0x48, 0x3b, + 0x4d, 0x40, 0x3b, 0x40, 0x46, 0x41, 0x51, 0x34, 0x40, 0x43, 0x3f, 0x42, + 0x45, 0x42, 0x3e, 0x35, 0x3d, 0x38, 0x37, 0x3a, 0x42, 0x40, 0x43, 0x3c, + 0x3c, 0x3d, 0x43, 0x40, 0x45, 0x3a, 0x3e, 0x3a, 0x3e, 0x40, 0x43, 0x35, + 0x37, 0x3f, 0x3f, 0x3e, 0x39, 0x3f, 0x47, 0x38, 0x3e, 0x44, 0x3b, 0x3c, + 0x3b, 0x32, 0x40, 0x3e, 0x42, 0x45, 0x3a, 0x52, 0x3a, 0x3e, 0x45, 0x40, + 0x41, 0x48, 0x3f, 0x4e, 0x3e, 0x42, 0x3d, 0x39, 0x3a, 0x33, 0x3f, 0x4b, + 0x3e, 0x38, 0x36, 0x3e, 0x31, 0x41, 0x3a, 0x40, 0x3b, 0x37, 0x3f, 0x3e, + 0x3e, 0x3f, 0x35, 0x44, 0x3d, 0x42, 0x3d, 0x44, 0x42, 0x3f, 0x3e, 0x44, + 0x3e, 0x45, 0x37, 0x3a, 0x3b, 0x42, 0x3f, 0x41, 0x3b, 0x3f, 0x41, 0x41, + 0x3e, 0x34, 0x47, 0x39, 0x46, 0x46, 0x37, 0x39, 0x3f, 0x45, 0x39, 0x39, + 0x3a, 0x40, 0x38, 0x3a, 0x31, 0x34, 0x3a, 0x41, 0x38, 0x41, 0x3a, 0x41, + 0x44, 0x37, 0x2d, 0x41, 0x43, 0x4d, 0x4b, 0x3b, 0x2c, 0x30, 0x42, 0x3b, + 0x31, 0x56, 0x43, 0x47, 0x47, 0x38, 0x50, 0x44, 0x40, 0x52, 0x5a, 0x50, + 0x44, 0x3f, 0x4b, 0x35, 0x3a, 0x36, 0x41, 0x44, 0x47, 0x4e, 0x52, 0x36, + 0x45, 0x39, 0x38, 0x3c, 0x42, 0x44, 0x40, 0x3b, 0x4b, 0x38, 0x35, 0x35, + 0x3f, 0x40, 0x4f, 0x39, 0x3d, 0x37, 0x34, 0x3e, 0x41, 0x4c, 0x40, 0x37, + 0x3d, 0x3b, 0x37, 0x37, 0x40, 0x42, 0x35, 0x39, 0x41, 0x42, 0x3d, 0x34, + 0x3c, 0x37, 0x3a, 0x3d, 0x46, 0x46, 0x46, 0x3f, 0x44, 0x3d, 0x3c, 0x40, + 0x3c, 0x3a, 0x3d, 0x3b, 0x3b, 0x41, 0x47, 0x3a, 0x43, 0x43, 0x43, 0x3b, + 0x3e, 0x3e, 0x42, 0x46, 0x36, 0x37, 0x45, 0x35, 0x3c, 0x3b, 0x31, 0x4b, + 0x3c, 0x3e, 0x3a, 0x3a, 0x42, 0x42, 0x34, 0x47, 0x37, 0x34, 0x41, 0x3d, + 0x3e, 0x39, 0x43, 0x47, 0x31, 0x3b, 0x40, 0x3b, 0x42, 0x3d, 0x44, 0x44, + 0x37, 0x39, 0x44, 0x3b, 0x40, 0x3a, 0x3d, 0x44, 0x3c, 0x40, 0x42, 0x3b, + 0x40, 0x3e, 0x32, 0x3d, 0x3c, 0x3e, 0x44, 0x3e, 0x47, 0x3d, 0x3f, 0x2e, + 0x3e, 0x3d, 0x3f, 0x3b, 0x3b, 0x43, 0x43, 0x3c, 0x3a, 0x3c, 0x3a, 0x36, + 0x38, 0x46, 0x30, 0x3e, 0x3f, 0x35, 0x3e, 0x34, 0x3c, 0x34, 0x32, 0x4a, + 0x41, 0x48, 0x48, 0x3f, 0x34, 0x37, 0x42, 0x43, 0x36, 0x59, 0x42, 0x3f, + 0x4b, 0x3d, 0x5d, 0x45, 0x3b, 0x51, 0x51, 0x4c, 0x41, 0x40, 0x4d, 0x36, + 0x3f, 0x34, 0x39, 0x3d, 0x4a, 0x4b, 0x4f, 0x33, 0x48, 0x32, 0x3c, 0x32, + 0x48, 0x4c, 0x4d, 0x3a, 0x49, 0x3a, 0x3a, 0x2e, 0x4b, 0x44, 0x4f, 0x33, + 0x3a, 0x48, 0x34, 0x43, 0x38, 0x45, 0x44, 0x35, 0x3b, 0x3f, 0x40, 0x37, + 0x35, 0x34, 0x38, 0x3e, 0x41, 0x3e, 0x3b, 0x47, 0x41, 0x47, 0x3c, 0x3c, + 0x39, 0x40, 0x3e, 0x45, 0x36, 0x41, 0x3f, 0x3f, 0x3c, 0x44, 0x3f, 0x43, + 0x3d, 0x3c, 0x49, 0x42, 0x3e, 0x3f, 0x48, 0x37, 0x43, 0x37, 0x43, 0x3d, + 0x32, 0x42, 0x44, 0x39, 0x36, 0x37, 0x40, 0x46, 0x47, 0x3d, 0x3a, 0x42, + 0x3f, 0x38, 0x37, 0x48, 0x39, 0x40, 0x3c, 0x37, 0x33, 0x38, 0x38, 0x40, + 0x41, 0x3c, 0x3f, 0x3b, 0x40, 0x3a, 0x47, 0x46, 0x3a, 0x37, 0x42, 0x47, + 0x3b, 0x3f, 0x3b, 0x40, 0x33, 0x3f, 0x3a, 0x3c, 0x38, 0x3a, 0x36, 0x38, + 0x36, 0x40, 0x48, 0x42, 0x48, 0x3c, 0x43, 0x36, 0x32, 0x3b, 0x34, 0x39, + 0x38, 0x46, 0x37, 0x3b, 0x44, 0x34, 0x36, 0x38, 0x3c, 0x43, 0x33, 0x3c, + 0x3b, 0x45, 0x38, 0x38, 0x44, 0x33, 0x36, 0x4a, 0x46, 0x4c, 0x4a, 0x34, + 0x36, 0x37, 0x43, 0x42, 0x33, 0x58, 0x43, 0x48, 0x44, 0x38, 0x5f, 0x3f, + 0x3c, 0x4d, 0x53, 0x52, 0x43, 0x47, 0x52, 0x3e, 0x3b, 0x2d, 0x3b, 0x3a, + 0x4b, 0x49, 0x53, 0x38, 0x4c, 0x2f, 0x38, 0x31, 0x42, 0x40, 0x48, 0x3f, + 0x44, 0x3c, 0x3c, 0x34, 0x46, 0x3f, 0x49, 0x3a, 0x43, 0x3d, 0x34, 0x42, + 0x36, 0x47, 0x51, 0x3c, 0x3d, 0x39, 0x39, 0x3a, 0x3b, 0x35, 0x35, 0x41, + 0x47, 0x3c, 0x3b, 0x43, 0x3f, 0x45, 0x3e, 0x40, 0x3c, 0x3f, 0x3c, 0x42, + 0x3b, 0x3e, 0x38, 0x3f, 0x3f, 0x41, 0x39, 0x39, 0x3d, 0x43, 0x4f, 0x3d, + 0x48, 0x3b, 0x44, 0x45, 0x3d, 0x3b, 0x49, 0x43, 0x44, 0x3d, 0x37, 0x3b, + 0x3c, 0x45, 0x46, 0x44, 0x35, 0x3e, 0x32, 0x35, 0x34, 0x3b, 0x40, 0x43, + 0x3e, 0x45, 0x37, 0x3d, 0x3f, 0x43, 0x36, 0x3f, 0x3f, 0x43, 0x39, 0x44, + 0x3e, 0x3e, 0x45, 0x40, 0x3e, 0x44, 0x3b, 0x3e, 0x42, 0x42, 0x3b, 0x3d, + 0x3a, 0x40, 0x39, 0x3a, 0x32, 0x36, 0x41, 0x30, 0x39, 0x46, 0x33, 0x3f, + 0x46, 0x40, 0x3c, 0x31, 0x41, 0x3a, 0x3f, 0x3f, 0x3b, 0x36, 0x3f, 0x38, + 0x36, 0x3e, 0x35, 0x35, 0x3b, 0x3d, 0x3f, 0x39, 0x46, 0x37, 0x3a, 0x47, + 0x37, 0x39, 0x2c, 0x55, 0x40, 0x4b, 0x4a, 0x39, 0x35, 0x42, 0x3d, 0x40, + 0x3a, 0x54, 0x41, 0x48, 0x51, 0x3b, 0x61, 0x3e, 0x3e, 0x4d, 0x51, 0x52, + 0x3e, 0x43, 0x52, 0x41, 0x48, 0x2d, 0x35, 0x35, 0x4b, 0x44, 0x4d, 0x3c, + 0x54, 0x33, 0x39, 0x27, 0x4a, 0x44, 0x4a, 0x41, 0x3c, 0x3a, 0x31, 0x2f, + 0x3d, 0x42, 0x48, 0x3f, 0x42, 0x40, 0x44, 0x3b, 0x40, 0x3e, 0x49, 0x3a, + 0x3c, 0x35, 0x30, 0x3e, 0x3e, 0x3d, 0x36, 0x3a, 0x3e, 0x3a, 0x4a, 0x3e, + 0x3d, 0x49, 0x40, 0x43, 0x3e, 0x45, 0x3f, 0x3c, 0x3b, 0x42, 0x3a, 0x39, + 0x3b, 0x47, 0x3f, 0x39, 0x49, 0x46, 0x3d, 0x34, 0x32, 0x44, 0x46, 0x42, + 0x47, 0x39, 0x49, 0x48, 0x3b, 0x38, 0x45, 0x45, 0x37, 0x38, 0x46, 0x46, + 0x37, 0x42, 0x35, 0x34, 0x45, 0x42, 0x35, 0x43, 0x3b, 0x3a, 0x43, 0x43, + 0x40, 0x42, 0x35, 0x3f, 0x38, 0x3f, 0x3a, 0x3a, 0x3b, 0x3f, 0x3e, 0x36, + 0x3f, 0x3c, 0x48, 0x3b, 0x3a, 0x41, 0x41, 0x35, 0x33, 0x3f, 0x3b, 0x45, + 0x48, 0x36, 0x40, 0x38, 0x47, 0x3d, 0x35, 0x40, 0x41, 0x42, 0x41, 0x37, + 0x41, 0x3e, 0x36, 0x48, 0x3e, 0x3c, 0x32, 0x39, 0x41, 0x40, 0x38, 0x3f, + 0x46, 0x43, 0x33, 0x40, 0x43, 0x43, 0x3a, 0x49, 0x3f, 0x35, 0x2c, 0x5d, + 0x43, 0x49, 0x52, 0x3b, 0x3c, 0x41, 0x40, 0x4a, 0x33, 0x50, 0x41, 0x46, + 0x52, 0x41, 0x68, 0x48, 0x44, 0x53, 0x54, 0x55, 0x42, 0x42, 0x57, 0x44, + 0x47, 0x35, 0x35, 0x3e, 0x4b, 0x44, 0x4e, 0x38, 0x55, 0x2f, 0x36, 0x2d, + 0x40, 0x48, 0x4b, 0x41, 0x48, 0x36, 0x32, 0x32, 0x44, 0x42, 0x47, 0x42, + 0x48, 0x3d, 0x3d, 0x39, 0x3e, 0x35, 0x4b, 0x39, 0x38, 0x3a, 0x39, 0x46, + 0x38, 0x3f, 0x3a, 0x42, 0x4b, 0x45, 0x3e, 0x32, 0x46, 0x43, 0x3b, 0x40, + 0x45, 0x41, 0x3e, 0x43, 0x37, 0x3d, 0x43, 0x3b, 0x46, 0x48, 0x42, 0x3b, + 0x3d, 0x48, 0x4a, 0x3c, 0x3b, 0x42, 0x40, 0x3c, 0x3a, 0x42, 0x38, 0x47, + 0x3b, 0x3b, 0x3d, 0x41, 0x3f, 0x38, 0x3f, 0x4a, 0x44, 0x3f, 0x47, 0x3a, + 0x47, 0x44, 0x43, 0x43, 0x34, 0x3d, 0x3a, 0x3c, 0x47, 0x3f, 0x3e, 0x39, + 0x42, 0x4a, 0x40, 0x36, 0x40, 0x41, 0x42, 0x3f, 0x3f, 0x43, 0x39, 0x38, + 0x3c, 0x3b, 0x4c, 0x2f, 0x41, 0x39, 0x40, 0x42, 0x3f, 0x42, 0x40, 0x36, + 0x3b, 0x45, 0x41, 0x41, 0x44, 0x45, 0x42, 0x37, 0x3d, 0x3a, 0x33, 0x3e, + 0x3b, 0x3b, 0x3c, 0x3d, 0x38, 0x49, 0x44, 0x39, 0x3f, 0x48, 0x3d, 0x41, + 0x42, 0x43, 0x44, 0x3e, 0x41, 0x3d, 0x32, 0x59, 0x45, 0x4b, 0x4b, 0x38, + 0x37, 0x3d, 0x48, 0x42, 0x3d, 0x52, 0x43, 0x46, 0x54, 0x48, 0x67, 0x4d, + 0x45, 0x4e, 0x49, 0x52, 0x45, 0x45, 0x58, 0x3b, 0x41, 0x38, 0x3f, 0x3f, + 0x49, 0x44, 0x4f, 0x48, 0x57, 0x31, 0x3c, 0x2a, 0x3e, 0x4c, 0x41, 0x40, + 0x47, 0x3f, 0x33, 0x34, 0x3f, 0x42, 0x48, 0x43, 0x4b, 0x38, 0x39, 0x3d, + 0x3f, 0x3e, 0x4b, 0x3f, 0x35, 0x36, 0x3c, 0x46, 0x3c, 0x45, 0x37, 0x3b, + 0x3c, 0x39, 0x41, 0x40, 0x41, 0x43, 0x44, 0x41, 0x45, 0x4f, 0x44, 0x43, + 0x44, 0x3c, 0x45, 0x34, 0x42, 0x45, 0x3f, 0x46, 0x3f, 0x43, 0x3d, 0x3a, + 0x39, 0x47, 0x45, 0x3d, 0x3f, 0x3b, 0x3d, 0x42, 0x38, 0x48, 0x48, 0x3b, + 0x3c, 0x3a, 0x3f, 0x41, 0x44, 0x4b, 0x44, 0x48, 0x41, 0x3c, 0x3d, 0x3c, + 0x3e, 0x3a, 0x4a, 0x3b, 0x49, 0x35, 0x3a, 0x3d, 0x41, 0x3f, 0x49, 0x39, + 0x44, 0x37, 0x3f, 0x3c, 0x42, 0x40, 0x4a, 0x46, 0x39, 0x38, 0x46, 0x37, + 0x41, 0x46, 0x41, 0x45, 0x40, 0x3b, 0x3b, 0x33, 0x3b, 0x39, 0x3c, 0x43, + 0x37, 0x3c, 0x44, 0x3d, 0x46, 0x39, 0x3c, 0x3c, 0x44, 0x48, 0x41, 0x44, + 0x41, 0x43, 0x46, 0x3b, 0x47, 0x41, 0x31, 0x41, 0x44, 0x40, 0x43, 0x42, + 0x3e, 0x43, 0x34, 0x65, 0x4f, 0x50, 0x4d, 0x3a, 0x37, 0x43, 0x4d, 0x4a, + 0x3d, 0x54, 0x40, 0x42, 0x5b, 0x3b, 0x71, 0x49, 0x44, 0x4f, 0x54, 0x56, + 0x48, 0x40, 0x52, 0x41, 0x42, 0x38, 0x3c, 0x49, 0x4a, 0x45, 0x51, 0x35, + 0x54, 0x2f, 0x35, 0x25, 0x4d, 0x3f, 0x4d, 0x43, 0x49, 0x33, 0x32, 0x3a, + 0x46, 0x48, 0x48, 0x3d, 0x43, 0x3a, 0x3c, 0x3a, 0x48, 0x40, 0x4b, 0x3b, + 0x45, 0x3b, 0x3f, 0x38, 0x37, 0x41, 0x31, 0x3b, 0x41, 0x43, 0x43, 0x37, + 0x48, 0x3f, 0x48, 0x37, 0x40, 0x4a, 0x43, 0x45, 0x3d, 0x39, 0x37, 0x37, + 0x3c, 0x3f, 0x47, 0x48, 0x43, 0x3e, 0x41, 0x3f, 0x3e, 0x38, 0x3e, 0x37, + 0x45, 0x45, 0x35, 0x44, 0x38, 0x3a, 0x49, 0x43, 0x40, 0x41, 0x40, 0x44, + 0x3c, 0x3e, 0x40, 0x38, 0x42, 0x41, 0x3c, 0x41, 0x3a, 0x3b, 0x3c, 0x3a, + 0x49, 0x3c, 0x42, 0x44, 0x3f, 0x39, 0x45, 0x32, 0x45, 0x43, 0x45, 0x39, + 0x43, 0x41, 0x4b, 0x39, 0x32, 0x3c, 0x3c, 0x36, 0x39, 0x3f, 0x46, 0x32, + 0x39, 0x35, 0x4f, 0x32, 0x3e, 0x40, 0x3d, 0x3e, 0x3a, 0x39, 0x4c, 0x38, + 0x43, 0x38, 0x49, 0x3b, 0x33, 0x39, 0x3b, 0x36, 0x36, 0x43, 0x3b, 0x3c, + 0x32, 0x3c, 0x3a, 0x45, 0x31, 0x3d, 0x37, 0x40, 0x3f, 0x3f, 0x35, 0xff, + 0x49, 0x4e, 0x4c, 0x3c, 0x36, 0x43, 0x46, 0x45, 0x41, 0x59, 0x44, 0x4a, + 0x53, 0x44, 0x71, 0x4a, 0x39, 0x4f, 0x50, 0x4b, 0x47, 0x42, 0x5a, 0x3c, + 0x45, 0x38, 0x3e, 0x42, 0x53, 0x43, 0x52, 0x3a, 0x52, 0x34, 0x31, 0x20, + 0x49, 0x4e, 0x46, 0x43, 0x4b, 0x3d, 0x2b, 0x27, 0x46, 0x46, 0x47, 0x41, + 0x42, 0x37, 0x39, 0x38, 0x45, 0x3f, 0x51, 0x3d, 0x48, 0x3f, 0x33, 0x3f, + 0x38, 0x45, 0x31, 0x38, 0x41, 0x3d, 0x47, 0x39, 0x42, 0x40, 0x4c, 0x3f, + 0x40, 0x42, 0x41, 0x41, 0x41, 0x42, 0x39, 0x35, 0x3f, 0x46, 0x45, 0x36, + 0x3f, 0x43, 0x3b, 0x39, 0x41, 0x38, 0x43, 0x37, 0x3d, 0x44, 0x3b, 0x40, + 0x36, 0x3d, 0x42, 0x41, 0x41, 0x3d, 0x38, 0x4a, 0x40, 0x4a, 0x4c, 0x38, + 0x3f, 0x40, 0x45, 0x3c, 0x3f, 0x4b, 0x43, 0x41, 0x43, 0x3e, 0x43, 0x3f, + 0x36, 0x40, 0x40, 0x39, 0x3f, 0x3a, 0x3a, 0x30, 0x41, 0x3c, 0x3c, 0x34, + 0x46, 0x38, 0x43, 0x34, 0x3a, 0x42, 0x43, 0x42, 0x40, 0x41, 0x49, 0x34, + 0x35, 0x40, 0x47, 0x3d, 0x3d, 0x3e, 0x4c, 0x33, 0x3c, 0x3b, 0x39, 0x43, + 0x3a, 0x3e, 0x3b, 0x37, 0x3f, 0x42, 0x31, 0x3d, 0x41, 0x3e, 0x32, 0x47, + 0x34, 0x41, 0x3d, 0x35, 0x39, 0x40, 0x38, 0x69, 0x4f, 0x4a, 0x49, 0x37, + 0x37, 0x44, 0x43, 0x46, 0x40, 0x58, 0x43, 0x48, 0x54, 0x46, 0x6c, 0x50, + 0x3a, 0x50, 0x50, 0x57, 0x47, 0x46, 0x5c, 0x40, 0x40, 0x39, 0x3e, 0x46, + 0x53, 0x46, 0x5c, 0x36, 0x4f, 0x32, 0x30, 0x2d, 0x4a, 0x48, 0x41, 0x45, + 0x47, 0x2f, 0x32, 0x2b, 0x43, 0x40, 0x43, 0x3c, 0x40, 0x44, 0x3e, 0x37, + 0x39, 0x3e, 0x48, 0x42, 0x45, 0x36, 0x47, 0x3f, 0x3b, 0x41, 0x35, 0x35, + 0x3b, 0x3e, 0x35, 0x43, 0x3e, 0x41, 0x3d, 0x36, 0x41, 0x3c, 0x40, 0x44, + 0x3d, 0x40, 0x35, 0x32, 0x48, 0x3e, 0x39, 0x42, 0x44, 0x3d, 0x39, 0x3b, + 0x3b, 0x45, 0x40, 0x4a, 0x3f, 0x41, 0x43, 0x39, 0x42, 0x44, 0x4c, 0x3c, + 0x3f, 0x3e, 0x3f, 0x43, 0x40, 0x42, 0x4c, 0x3b, 0x3e, 0x3d, 0x49, 0x42, + 0x40, 0x44, 0x40, 0x34, 0x36, 0x40, 0x45, 0x39, 0x42, 0x40, 0x3e, 0x44, + 0x45, 0x37, 0x3c, 0x38, 0x3e, 0x49, 0x3e, 0x3c, 0x41, 0x3d, 0x42, 0x32, + 0x40, 0x45, 0x3e, 0x36, 0x44, 0x3a, 0x4e, 0x38, 0x43, 0x38, 0x40, 0x38, + 0x49, 0x42, 0x40, 0x3d, 0x42, 0x48, 0x48, 0x3d, 0x41, 0x3a, 0x3f, 0x41, + 0x38, 0x3c, 0x44, 0x39, 0x3a, 0x32, 0x3a, 0x3e, 0x3d, 0x3b, 0x39, 0x38, + 0x3a, 0x43, 0x3a, 0x6b, 0x45, 0x50, 0x47, 0x33, 0x38, 0x48, 0x4d, 0x4f, + 0x39, 0x4b, 0x46, 0x4a, 0x4f, 0x42, 0x6f, 0x4b, 0x40, 0x55, 0x54, 0x50, + 0x42, 0x47, 0x5e, 0x46, 0x40, 0x34, 0x40, 0x47, 0x52, 0x46, 0x55, 0x3b, + 0x4f, 0x2b, 0x35, 0x33, 0x4c, 0x44, 0x44, 0x48, 0x47, 0x37, 0x35, 0x27, + 0x4a, 0x3b, 0x41, 0x40, 0x40, 0x3e, 0x36, 0x39, 0x3e, 0x3c, 0x45, 0x3f, + 0x4d, 0x41, 0x3d, 0x48, 0x47, 0x46, 0x33, 0x3d, 0x3d, 0x3e, 0x34, 0x3f, + 0x3e, 0x3a, 0x41, 0x35, 0x3b, 0x3e, 0x42, 0x3c, 0x42, 0x42, 0x40, 0x31, + 0x37, 0x40, 0x36, 0x42, 0x48, 0x39, 0x3d, 0x3c, 0x3a, 0x43, 0x39, 0x3d, + 0x47, 0x49, 0x43, 0x3d, 0x45, 0x39, 0x44, 0x37, 0x3e, 0x4d, 0x3d, 0x40, + 0x3d, 0x4c, 0x4d, 0x44, 0x3c, 0x3d, 0x46, 0x41, 0x41, 0x42, 0x40, 0x40, + 0x41, 0x3a, 0x3c, 0x3b, 0x3c, 0x44, 0x40, 0x34, 0x44, 0x38, 0x3b, 0x33, + 0x45, 0x45, 0x44, 0x3f, 0x3e, 0x3a, 0x3b, 0x3b, 0x43, 0x39, 0x3a, 0x45, + 0x3b, 0x3a, 0x4b, 0x39, 0x3d, 0x38, 0x41, 0x39, 0x42, 0x45, 0x43, 0x40, + 0x3e, 0x35, 0x44, 0x3f, 0x45, 0x41, 0x40, 0x3e, 0x43, 0x42, 0x37, 0x3a, + 0x38, 0x35, 0x3a, 0x48, 0x3e, 0x3b, 0x40, 0x38, 0x3c, 0x3c, 0x3b, 0x6a, + 0x48, 0x4d, 0x4d, 0x34, 0x38, 0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x41, 0x4b, + 0x58, 0x46, 0x71, 0x49, 0x3d, 0x53, 0x44, 0x52, 0x42, 0x3e, 0x57, 0x4c, + 0x4c, 0x38, 0x40, 0x3b, 0x5c, 0x4c, 0x52, 0x3e, 0x4c, 0x2d, 0x32, 0x37, + 0x49, 0x3f, 0x41, 0x47, 0x4a, 0x3b, 0x2f, 0x26, 0x45, 0x40, 0x47, 0x42, + 0x3d, 0x39, 0x2d, 0x2c, 0x3f, 0x45, 0x46, 0x44, 0x48, 0x43, 0x42, 0x48, + 0x40, 0x41, 0x3b, 0x3b, 0x41, 0x3b, 0x39, 0x40, 0x3b, 0x47, 0x3f, 0x38, + 0x3f, 0x49, 0x3b, 0x35, 0x40, 0x45, 0x38, 0x35, 0x36, 0x34, 0x3e, 0x3d, + 0x46, 0x3e, 0x33, 0x38, 0x43, 0x48, 0x3f, 0x45, 0x31, 0x44, 0x38, 0x35, + 0x3c, 0x41, 0x4b, 0x44, 0x3d, 0x43, 0x38, 0x48, 0x3c, 0x39, 0x4a, 0x42, + 0x3d, 0x43, 0x3f, 0x49, 0x3e, 0x47, 0x49, 0x41, 0x3b, 0x3c, 0x47, 0x3a, + 0x3d, 0x40, 0x4a, 0x38, 0x3d, 0x3b, 0x47, 0x3a, 0x36, 0x47, 0x42, 0x46, + 0x3c, 0x3d, 0x45, 0x3b, 0x48, 0x3f, 0x38, 0x36, 0x39, 0x46, 0x43, 0x3a, + 0x41, 0x3d, 0x39, 0x39, 0x46, 0x37, 0x3f, 0x3f, 0x3a, 0x46, 0x3f, 0x39, + 0x49, 0x44, 0x42, 0x3a, 0x3a, 0x43, 0x3e, 0x42, 0x3d, 0x3d, 0x43, 0x40, + 0x43, 0x3c, 0x3f, 0x43, 0x40, 0x42, 0x3b, 0x57, 0x4a, 0x4f, 0x4a, 0x2d, + 0x3b, 0x48, 0x45, 0x42, 0x34, 0x4c, 0x3e, 0x4f, 0x4d, 0x40, 0x6c, 0x4b, + 0x3b, 0x4d, 0x4c, 0x57, 0x49, 0x3d, 0x5d, 0x44, 0x43, 0x29, 0x42, 0x3f, + 0x5b, 0x47, 0x4f, 0x3e, 0x54, 0x2e, 0x34, 0x34, 0x4b, 0x47, 0x46, 0x46, + 0x4b, 0x34, 0x36, 0x28, 0x3e, 0x3f, 0x42, 0x40, 0x3b, 0x38, 0x39, 0x42, + 0x49, 0x3d, 0x49, 0x47, 0x47, 0x3b, 0x43, 0x34, 0x39, 0x36, 0x42, 0x3d, + 0x37, 0x40, 0x37, 0x38, 0x46, 0x42, 0x49, 0x37, 0x44, 0x3f, 0x38, 0x3e, + 0x36, 0x32, 0x33, 0x38, 0x40, 0x46, 0x42, 0x34, 0x41, 0x42, 0x3e, 0x38, + 0x44, 0x3e, 0x3f, 0x43, 0x3f, 0x43, 0x35, 0x3f, 0x4d, 0x3b, 0x43, 0x39, + 0x40, 0x47, 0x3f, 0x4a, 0x3a, 0x3f, 0x45, 0x45, 0x48, 0x42, 0x3b, 0x47, + 0x42, 0x4b, 0x47, 0x3e, 0x3c, 0x42, 0x46, 0x39, 0x41, 0x3f, 0x48, 0x33, + 0x45, 0x34, 0x3d, 0x30, 0x40, 0x4c, 0x40, 0x40, 0x39, 0x37, 0x40, 0x33, + 0x49, 0x42, 0x45, 0x38, 0x3c, 0x43, 0x45, 0x35, 0x37, 0x33, 0x34, 0x3b, + 0x3b, 0x38, 0x39, 0x41, 0x42, 0x40, 0x3e, 0x3e, 0x41, 0x33, 0x3a, 0x36, + 0x40, 0x3a, 0x3c, 0x45, 0x43, 0x3c, 0x40, 0x41, 0x49, 0x47, 0x35, 0x34, + 0x3a, 0x3d, 0x3a, 0x68, 0x4f, 0x48, 0x43, 0x36, 0x37, 0x3e, 0x45, 0x49, + 0x3a, 0x4d, 0x41, 0x3d, 0x46, 0x45, 0x65, 0x46, 0x38, 0x4d, 0x4a, 0x53, + 0x43, 0x41, 0x5d, 0x47, 0x41, 0x34, 0x39, 0x43, 0x4e, 0x48, 0x50, 0x38, + 0x53, 0x32, 0x30, 0x2e, 0x49, 0x4c, 0x4d, 0x3f, 0x46, 0x38, 0x34, 0x2b, + 0x44, 0x44, 0x41, 0x41, 0x36, 0x40, 0x3f, 0x32, 0x46, 0x38, 0x50, 0x45, + 0x3f, 0x3d, 0x3b, 0x36, 0x3b, 0x43, 0x3a, 0x34, 0x36, 0x3f, 0x39, 0x35, + 0x3c, 0x40, 0x40, 0x37, 0x3c, 0x39, 0x3d, 0x36, 0x48, 0x3d, 0x43, 0x34, + 0x3b, 0x46, 0x43, 0x41, 0x33, 0x3e, 0x44, 0x3d, 0x44, 0x44, 0x4c, 0x3c, + 0x37, 0x49, 0x42, 0x35, 0x45, 0x3a, 0x3c, 0x41, 0x3a, 0x45, 0x46, 0x41, + 0x3c, 0x48, 0x46, 0x36, 0x36, 0x42, 0x3b, 0x46, 0x42, 0x45, 0x44, 0x47, + 0x3f, 0x44, 0x3a, 0x35, 0x37, 0x46, 0x40, 0x38, 0x40, 0x3d, 0x36, 0x2c, + 0x34, 0x47, 0x40, 0x38, 0x3f, 0x3f, 0x44, 0x2d, 0x3b, 0x3d, 0x3e, 0x44, + 0x3c, 0x40, 0x3e, 0x33, 0x3c, 0x3a, 0x49, 0x40, 0x42, 0x42, 0x3a, 0x3b, + 0x33, 0x3d, 0x3c, 0x43, 0x3e, 0x3d, 0x3a, 0x3a, 0x48, 0x3e, 0x3c, 0x39, + 0x3f, 0x44, 0x37, 0x40, 0x3f, 0x3c, 0x3e, 0x3d, 0x38, 0x42, 0x34, 0x62, + 0x51, 0x47, 0x44, 0x3f, 0x32, 0x3c, 0x3f, 0x46, 0x3d, 0x46, 0x3e, 0x45, + 0x4a, 0x3e, 0x5d, 0x43, 0x45, 0x49, 0x4a, 0x55, 0x41, 0x3c, 0x5a, 0x44, + 0x43, 0x3b, 0x3c, 0x3a, 0x4b, 0x4e, 0x4d, 0x42, 0x49, 0x30, 0x3b, 0x38, + 0x42, 0x44, 0x51, 0x40, 0x48, 0x33, 0x3f, 0x2b, 0x3c, 0x41, 0x3c, 0x45, + 0x35, 0x39, 0x42, 0x37, 0x40, 0x46, 0x46, 0x3f, 0x41, 0x45, 0x42, 0x3d, + 0x43, 0x38, 0x3e, 0x38, 0x3c, 0x39, 0x40, 0x38, 0x37, 0x36, 0x3d, 0x3d, + 0x38, 0x47, 0x45, 0x3b, 0x45, 0x44, 0x42, 0x2e, 0x37, 0x40, 0x42, 0x42, + 0x3c, 0x36, 0x3b, 0x39, 0x44, 0x4d, 0x42, 0x3f, 0x3a, 0x3e, 0x45, 0x34, + 0x3c, 0x43, 0x47, 0x43, 0x3f, 0x48, 0x3b, 0x44, 0x3d, 0x44, 0x43, 0x3e, + 0x40, 0x4a, 0x31, 0x42, 0x42, 0x43, 0x48, 0x45, 0x3a, 0x42, 0x36, 0x2f, + 0x3c, 0x3e, 0x3b, 0x3b, 0x44, 0x3f, 0x3a, 0x2c, 0x47, 0x3f, 0x4a, 0x40, + 0x40, 0x40, 0x3c, 0x2a, 0x3e, 0x44, 0x40, 0x43, 0x3a, 0x42, 0x39, 0x34, + 0x49, 0x3e, 0x36, 0x42, 0x3f, 0x42, 0x33, 0x3b, 0x3c, 0x45, 0x39, 0x3f, + 0x3e, 0x3f, 0x41, 0x3d, 0x32, 0x3b, 0x31, 0x40, 0x3f, 0x44, 0x3c, 0x3f, + 0x40, 0x46, 0x45, 0x36, 0x36, 0x42, 0x30, 0x57, 0x47, 0x44, 0x48, 0x3f, + 0x35, 0x37, 0x3f, 0x3f, 0x38, 0x4a, 0x41, 0x46, 0x50, 0x3d, 0x5b, 0x41, + 0x3e, 0x3c, 0x4a, 0x54, 0x45, 0x41, 0x5b, 0x46, 0x3d, 0x3b, 0x43, 0x33, + 0x45, 0x4e, 0x43, 0x3b, 0x44, 0x37, 0x37, 0x32, 0x4c, 0x3d, 0x4c, 0x3f, + 0x49, 0x3b, 0x37, 0x3a, 0x33, 0x43, 0x3f, 0x40, 0x44, 0x36, 0x3b, 0x44, + 0x45, 0x40, 0x3c, 0x3c, 0x41, 0x44, 0x3b, 0x3d, 0x33, 0x37, 0x3c, 0x35, + 0x3d, 0x3f, 0x39, 0x38, 0x33, 0x43, 0x3e, 0x39, 0x3b, 0x3e, 0x41, 0x35, + 0x40, 0x46, 0x43, 0x35, 0x41, 0x3d, 0x32, 0x39, 0x3c, 0x40, 0x3e, 0x3f, + 0x42, 0x38, 0x3b, 0x45, 0x3a, 0x3d, 0x40, 0x36, 0x3a, 0x40, 0x46, 0x44, + 0x48, 0x45, 0x3f, 0x3a, 0x45, 0x45, 0x3c, 0x3b, 0x40, 0x4c, 0x39, 0x3a, + 0x38, 0x39, 0x46, 0x3a, 0x3e, 0x4b, 0x34, 0x39, 0x3d, 0x3f, 0x40, 0x39, + 0x45, 0x31, 0x45, 0x29, 0x3f, 0x38, 0x3a, 0x3f, 0x38, 0x3b, 0x36, 0x2d, + 0x43, 0x3d, 0x45, 0x3c, 0x46, 0x3f, 0x40, 0x3c, 0x3a, 0x3e, 0x3d, 0x38, + 0x3f, 0x3c, 0x3f, 0x42, 0x35, 0x3f, 0x3a, 0x43, 0x3d, 0x43, 0x3d, 0x33, + 0x3d, 0x48, 0x42, 0x3d, 0x45, 0x46, 0x3d, 0x35, 0x32, 0x44, 0x42, 0x37, + 0x3d, 0x40, 0x3c, 0x47, 0x4a, 0x45, 0x47, 0x2f, 0x33, 0x36, 0x3f, 0x42, + 0x38, 0x43, 0x3e, 0x3a, 0x41, 0x3f, 0x5f, 0x3f, 0x48, 0x3a, 0x44, 0x47, + 0x41, 0x3e, 0x57, 0x42, 0x41, 0x33, 0x34, 0x39, 0x42, 0x44, 0x42, 0x3c, + 0x49, 0x34, 0x37, 0x33, 0x47, 0x38, 0x43, 0x3d, 0x43, 0x3e, 0x3e, 0x36, + 0x41, 0x41, 0x37, 0x40, 0x39, 0x3e, 0x3b, 0x3b, 0x3e, 0x41, 0x3d, 0x3b, + 0x43, 0x3e, 0x39, 0x43, 0x2f, 0x3e, 0x33, 0x40, 0x45, 0x47, 0x30, 0x46, + 0x3f, 0x3f, 0x37, 0x42, 0x3d, 0x42, 0x43, 0x37, 0x38, 0x3c, 0x35, 0x34, + 0x41, 0x43, 0x3e, 0x3e, 0x3f, 0x49, 0x35, 0x35, 0x38, 0x36, 0x3a, 0x43, + 0x38, 0x46, 0x48, 0x36, 0x3f, 0x39, 0x3b, 0x3e, 0x48, 0x47, 0x41, 0x34, + 0x3b, 0x3c, 0x37, 0x3e, 0x40, 0x41, 0x3b, 0x3d, 0x43, 0x42, 0x3a, 0x39, + 0x3b, 0x43, 0x38, 0x2b, 0x43, 0x41, 0x48, 0x35, 0x44, 0x44, 0x3e, 0x2c, + 0x46, 0x40, 0x3e, 0x41, 0x38, 0x34, 0x35, 0x37, 0x34, 0x3f, 0x3d, 0x46, + 0x33, 0x3c, 0x3c, 0x2e, 0x3b, 0x45, 0x3d, 0x3e, 0x3a, 0x42, 0x3c, 0x36, + 0x3a, 0x42, 0x39, 0x43, 0x35, 0x39, 0x40, 0x44, 0x47, 0x41, 0x44, 0x3d, + 0x41, 0x3e, 0x38, 0x39, 0x45, 0x3a, 0x35, 0x43, 0x3f, 0x44, 0x41, 0x49, + 0x47, 0x3f, 0x44, 0x40, 0x38, 0x43, 0x40, 0x3e, 0x39, 0x42, 0x32, 0x3b, + 0x42, 0x47, 0x57, 0x37, 0x36, 0x38, 0x43, 0x49, 0x3b, 0x34, 0x54, 0x42, + 0x3d, 0x3f, 0x3e, 0x3b, 0x38, 0x41, 0x43, 0x3a, 0x44, 0x39, 0x34, 0x2c, + 0x38, 0x43, 0x4b, 0x3f, 0x40, 0x3e, 0x32, 0x33, 0x3d, 0x44, 0x45, 0x44, + 0x3e, 0x35, 0x37, 0x39, 0x40, 0x3e, 0x40, 0x3c, 0x34, 0x43, 0x37, 0x40, + 0x39, 0x3e, 0x3d, 0x43, 0x3a, 0x44, 0x43, 0x44, 0x3d, 0x3b, 0x45, 0x3b, + 0x3a, 0x3a, 0x3f, 0x37, 0x43, 0x3b, 0x33, 0x35, 0x40, 0x47, 0x3e, 0x3c, + 0x39, 0x3c, 0x34, 0x29, 0x3c, 0x3e, 0x46, 0x3e, 0x3c, 0x38, 0x3f, 0x2d, + 0x3d, 0x3d, 0x3f, 0x3f, 0x3d, 0x45, 0x3b, 0x32, 0x39, 0x3f, 0x41, 0x38, + 0x36, 0x3e, 0x3a, 0x35, 0x40, 0x3f, 0x3b, 0x32, 0x3c, 0x39, 0x3e, 0x35, + 0x3e, 0x45, 0x34, 0x38, 0x44, 0x39, 0x3f, 0x31, 0x34, 0x39, 0x3f, 0x38, + 0x44, 0x42, 0x3f, 0x3b, 0x39, 0x3d, 0x39, 0x3b, 0x44, 0x46, 0x38, 0x3d, + 0x45, 0x37, 0x40, 0x3a, 0x3a, 0x39, 0x35, 0x3c, 0x39, 0x40, 0x47, 0x3e, + 0x38, 0x42, 0x41, 0x3b, 0x48, 0x3f, 0x3a, 0x3e, 0x3d, 0x3f, 0x32, 0x3b, + 0x3f, 0x3d, 0x3e, 0x44, 0x43, 0x41, 0x44, 0x47, 0x48, 0x41, 0x41, 0x36, + 0x3a, 0x33, 0x3c, 0x3c, 0x37, 0x3e, 0x40, 0x34, 0x3f, 0x42, 0x53, 0x40, + 0x3f, 0x35, 0x3e, 0x46, 0x3a, 0x3e, 0x4b, 0x41, 0x46, 0x32, 0x39, 0x36, + 0x3b, 0x4f, 0x36, 0x3c, 0x40, 0x3a, 0x40, 0x40, 0x47, 0x3e, 0x49, 0x37, + 0x3f, 0x31, 0x3e, 0x40, 0x3b, 0x3f, 0x43, 0x44, 0x3a, 0x3d, 0x31, 0x41, + 0x41, 0x33, 0x43, 0x40, 0x3c, 0x3a, 0x41, 0x40, 0x37, 0x3f, 0x34, 0x3e, + 0x44, 0x42, 0x3d, 0x3f, 0x3f, 0x34, 0x36, 0x34, 0x31, 0x41, 0x32, 0x39, + 0x3e, 0x3d, 0x42, 0x35, 0x3e, 0x3a, 0x41, 0x47, 0x3d, 0x42, 0x33, 0x32, + 0x43, 0x42, 0x36, 0x41, 0x3e, 0x39, 0x46, 0x39, 0x35, 0x3d, 0x3d, 0x40, + 0x38, 0x44, 0x3d, 0x31, 0x44, 0x39, 0x3a, 0x45, 0x42, 0x41, 0x3d, 0x36, + 0x3f, 0x3c, 0x39, 0x3d, 0x32, 0x39, 0x42, 0x34, 0x3f, 0x38, 0x44, 0x3c, + 0x43, 0x45, 0x41, 0x2d, 0x44, 0x42, 0x3d, 0x3f, 0x44, 0x38, 0x3d, 0x35, + 0x3a, 0x48, 0x40, 0x3b, 0x3d, 0x36, 0x3b, 0x40, 0x3f, 0x3a, 0x3a, 0x3f, + 0x3c, 0x33, 0x39, 0x3c, 0x3c, 0x38, 0x47, 0x36, 0x3d, 0x41, 0x46, 0x41, + 0x34, 0x46, 0x48, 0x46, 0x3d, 0x3c, 0x40, 0x43, 0x3d, 0x41, 0x37, 0x3e, + 0x39, 0x47, 0x3f, 0x39, 0x46, 0x43, 0x3f, 0x41, 0x45, 0x37, 0x40, 0x3a, + 0x3d, 0x44, 0x3f, 0x3b, 0x3b, 0x40, 0x4f, 0x3d, 0x3d, 0x41, 0x3c, 0x43, + 0x3e, 0x46, 0x4e, 0x40, 0x3f, 0x34, 0x48, 0x29, 0x45, 0x44, 0x46, 0x41, + 0x45, 0x32, 0x3e, 0x38, 0x39, 0x3a, 0x3e, 0x3e, 0x4c, 0x34, 0x3c, 0x40, + 0x4a, 0x44, 0x3d, 0x46, 0x3b, 0x3e, 0x42, 0x42, 0x3a, 0x41, 0x43, 0x41, + 0x39, 0x3f, 0x3e, 0x3c, 0x36, 0x48, 0x3f, 0x3e, 0x3e, 0x37, 0x3f, 0x3f, + 0x3b, 0x40, 0x3e, 0x35, 0x32, 0x35, 0x3f, 0x33, 0x3f, 0x38, 0x43, 0x37, + 0x49, 0x38, 0x37, 0x3c, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x46, 0x37, 0x34, + 0x34, 0x3b, 0x3d, 0x2f, 0x3a, 0x38, 0x3d, 0x46, 0x3d, 0x3b, 0x3d, 0x38, + 0x35, 0x37, 0x44, 0x3c, 0x3d, 0x3e, 0x40, 0x3a, 0x40, 0x33, 0x3e, 0x38, + 0x40, 0x3e, 0x45, 0x37, 0x3f, 0x3b, 0x3c, 0x40, 0x3b, 0x3c, 0x3b, 0x33, + 0x41, 0x3f, 0x3b, 0x42, 0x31, 0x3b, 0x3a, 0x39, 0x3d, 0x41, 0x39, 0x40, + 0x43, 0x45, 0x39, 0x3b, 0x3a, 0x42, 0x43, 0x3d, 0x3f, 0x40, 0x47, 0x39, + 0x37, 0x3f, 0x47, 0x3f, 0x45, 0x41, 0x39, 0x3a, 0x41, 0x38, 0x3c, 0x3c, + 0x39, 0x40, 0x39, 0x3b, 0x3b, 0x3e, 0x38, 0x3b, 0x37, 0x48, 0x41, 0x3f, + 0x3e, 0x37, 0x3d, 0x44, 0x3c, 0x3e, 0x40, 0x39, 0x41, 0x42, 0x3d, 0x45, + 0x3b, 0x3e, 0x4c, 0x3b, 0x3a, 0x3a, 0x3e, 0x47, 0x3c, 0x3f, 0x48, 0x3f, + 0x46, 0x3f, 0x39, 0x25, 0x44, 0x3a, 0x3b, 0x40, 0x41, 0x39, 0x39, 0x47, + 0x3b, 0x32, 0x49, 0x42, 0x41, 0x3a, 0x43, 0x41, 0x3e, 0x35, 0x37, 0x3d, + 0x49, 0x40, 0x45, 0x3b, 0x3c, 0x38, 0x48, 0x3c, 0x3c, 0x35, 0x3f, 0x41, + 0x41, 0x4c, 0x36, 0x39, 0x37, 0x3d, 0x3b, 0x3e, 0x44, 0x32, 0x3d, 0x3f, + 0x3a, 0x3b, 0x3a, 0x47, 0x38, 0x42, 0x36, 0x34, 0x43, 0x3f, 0x3e, 0x40, + 0x34, 0x31, 0x36, 0x33, 0x42, 0x37, 0x41, 0x41, 0x40, 0x3d, 0x3d, 0x37, + 0x43, 0x3a, 0x3e, 0x44, 0x43, 0x3c, 0x35, 0x38, 0x38, 0x3c, 0x43, 0x36, + 0x3a, 0x38, 0x40, 0x3f, 0x3d, 0x3e, 0x37, 0x3b, 0x41, 0x3a, 0x3b, 0x3d, + 0x3c, 0x41, 0x3c, 0x41, 0x47, 0x3f, 0x3f, 0x3b, 0x3d, 0x3f, 0x3b, 0x45, + 0x38, 0x38, 0x40, 0x38, 0x46, 0x42, 0x39, 0x3d, 0x3d, 0x3b, 0x42, 0x36, + 0x42, 0x41, 0x3e, 0x3e, 0x36, 0x3f, 0x37, 0x3f, 0x36, 0x48, 0x3b, 0x39, + 0x3d, 0x3f, 0x43, 0x3e, 0x3c, 0x40, 0x48, 0x46, 0x43, 0x36, 0x42, 0x39, + 0x46, 0x3c, 0x37, 0x38, 0x49, 0x37, 0x36, 0x39, 0x3e, 0x42, 0x48, 0x3a, + 0x3c, 0x3e, 0x42, 0x30, 0x3e, 0x34, 0x39, 0x3b, 0x46, 0x61, 0x46, 0x1e, + 0x4c, 0x3b, 0x40, 0x2d, 0x3c, 0x42, 0x32, 0x30, 0x49, 0x3e, 0x39, 0x34, + 0x30, 0x40, 0x31, 0x38, 0x40, 0x3d, 0x3c, 0x35, 0x3a, 0x36, 0x40, 0x3b, + 0x41, 0x40, 0x3b, 0x39, 0x37, 0x37, 0x3f, 0x3b, 0x3c, 0x3a, 0x40, 0x3a, + 0x36, 0x3c, 0x42, 0x39, 0x3e, 0x36, 0x40, 0x42, 0x39, 0x40, 0x3b, 0x34, + 0x37, 0x33, 0x36, 0x3f, 0x43, 0x33, 0x33, 0x27, 0x3d, 0x46, 0x40, 0x31, + 0x38, 0x3e, 0x41, 0x20, 0x3f, 0x39, 0x42, 0x35, 0x35, 0x45, 0x40, 0x1e, + 0x32, 0x35, 0x32, 0x3c, 0x35, 0x44, 0x46, 0x29, 0x3a, 0x3d, 0x37, 0x42, + 0x3b, 0x45, 0x3a, 0x26, 0x38, 0x40, 0x30, 0x37, 0x41, 0x40, 0x39, 0x2b, + 0x49, 0x3f, 0x43, 0x43, 0x40, 0x3a, 0x38, 0x29, 0x43, 0x3a, 0x37, 0x40, + 0x3f, 0x35, 0x3a, 0x28, 0x36, 0x3e, 0x3f, 0x43, 0x3c, 0x39, 0x42, 0x2c, + 0x38, 0x42, 0x38, 0x3d, 0x42, 0x38, 0x35, 0x2d, 0x34, 0x38, 0x3d, 0x43, + 0x46, 0x3e, 0x3c, 0x27, 0x3e, 0x40, 0x46, 0x39, 0x35, 0x3d, 0x42, 0x35, + 0x42, 0x36, 0x40, 0x3e, 0x3a, 0x3e, 0x3c, 0x37, 0x3a, 0x3c, 0x48, 0x48, + 0x48, 0x37, 0x3d, 0x38, 0x4b, 0x40, 0x43, 0x3b, 0x41, 0x46, 0x3c, 0x34, + 0x46, 0x3c, 0x3c, 0x3c, 0x4b, 0x64, 0x4a, 0x22, 0x52, 0x41, 0x42, 0x3b, + 0x42, 0x4a, 0x34, 0x37, 0x4b, 0x44, 0x3b, 0x4a, 0x38, 0x3f, 0x38, 0x3a, + 0x40, 0x41, 0x42, 0x3c, 0x33, 0x3e, 0x3c, 0x42, 0x2c, 0x4e, 0x47, 0x3f, + 0x38, 0x33, 0x39, 0x3f, 0x3b, 0x45, 0x37, 0x3a, 0x42, 0x42, 0x44, 0x3f, + 0x3c, 0x3c, 0x3e, 0x3d, 0x3c, 0x3c, 0x40, 0x2c, 0x3c, 0x3d, 0x42, 0x39, + 0x3a, 0x37, 0x43, 0x2a, 0x3d, 0x40, 0x41, 0x41, 0x46, 0x46, 0x42, 0x28, + 0x39, 0x3c, 0x37, 0x44, 0x46, 0x41, 0x47, 0x2b, 0x44, 0x33, 0x39, 0x3f, + 0x3f, 0x43, 0x3d, 0x23, 0x3a, 0x43, 0x41, 0x3b, 0x41, 0x42, 0x33, 0x1f, + 0x43, 0x3e, 0x3d, 0x40, 0x37, 0x33, 0x42, 0x28, 0x3b, 0x38, 0x37, 0x3c, + 0x34, 0x40, 0x44, 0x2a, 0x3c, 0x3a, 0x41, 0x37, 0x45, 0x3f, 0x3e, 0x26, + 0x41, 0x40, 0x35, 0x3d, 0x45, 0x3e, 0x3d, 0x29, 0x3c, 0x39, 0x3f, 0x3c, + 0x3d, 0x39, 0x38, 0x2d, 0x39, 0x38, 0x38, 0x44, 0x3c, 0x3e, 0x38, 0x26, + 0x40, 0x36, 0x39, 0x38, 0x3f, 0x32, 0x39, 0x35, 0x3d, 0x3e, 0x35, 0x3a, + 0x3f, 0x3f, 0x31, 0x35, 0x34, 0x45, 0x3e, 0x43, 0x48, 0x3b, 0x37, 0x39, + 0x4d, 0x46, 0x54, 0x40, 0x41, 0x4e, 0x3d, 0x38, 0x4d, 0x38, 0x3a, 0x3b, + 0x49, 0x5a, 0x4a, 0x1e, 0x5e, 0x39, 0x38, 0x37, 0x3a, 0x51, 0x3a, 0x3c, + 0x50, 0x3f, 0x40, 0x42, 0x33, 0x3b, 0x2e, 0x4a, 0x3f, 0x4a, 0x3b, 0x43, + 0x36, 0x3e, 0x3d, 0x42, 0x39, 0x46, 0x4b, 0x3c, 0x3b, 0x3b, 0x35, 0x3e, + 0x3d, 0x4b, 0x3f, 0x41, 0x3f, 0x3b, 0x42, 0x42, 0x38, 0x3a, 0x41, 0x3d, + 0x36, 0x41, 0x37, 0x2f, 0x38, 0x37, 0x3f, 0x34, 0x35, 0x35, 0x45, 0x30, + 0x31, 0x42, 0x31, 0x3a, 0x3a, 0x3e, 0x3d, 0x23, 0x3f, 0x43, 0x3b, 0x41, + 0x35, 0x3b, 0x40, 0x25, 0x45, 0x3e, 0x42, 0x3b, 0x31, 0x40, 0x36, 0x28, + 0x43, 0x42, 0x30, 0x42, 0x32, 0x32, 0x36, 0x2c, 0x35, 0x3a, 0x3d, 0x3a, + 0x3c, 0x36, 0x3e, 0x30, 0x41, 0x42, 0x38, 0x41, 0x41, 0x3e, 0x3c, 0x23, + 0x37, 0x40, 0x3c, 0x3e, 0x3e, 0x3a, 0x37, 0x2b, 0x36, 0x40, 0x41, 0x42, + 0x3e, 0x38, 0x44, 0x22, 0x46, 0x38, 0x33, 0x3b, 0x3a, 0x3a, 0x3a, 0x24, + 0x36, 0x3b, 0x38, 0x44, 0x34, 0x38, 0x40, 0x28, 0x38, 0x3d, 0x36, 0x44, + 0x31, 0x3e, 0x37, 0x37, 0x36, 0x3f, 0x47, 0x38, 0x3b, 0x3e, 0x2c, 0x4c, + 0x36, 0x3c, 0x3b, 0x41, 0x4c, 0x3d, 0x3d, 0x40, 0x49, 0x44, 0x52, 0x3f, + 0x3b, 0x4d, 0x3c, 0x3a, 0x4f, 0x3b, 0x36, 0x3b, 0x4a, 0x5f, 0x4e, 0x1f, + 0x57, 0x3c, 0x3d, 0x3d, 0x46, 0x59, 0x42, 0x45, 0x52, 0x3d, 0x3a, 0x41, + 0x31, 0x39, 0x39, 0x4f, 0x43, 0x4e, 0x3e, 0x37, 0x3a, 0x37, 0x33, 0x47, + 0x32, 0x45, 0x47, 0x43, 0x31, 0x33, 0x38, 0x43, 0x3e, 0x47, 0x3d, 0x32, + 0x3b, 0x39, 0x3c, 0x42, 0x3d, 0x47, 0x42, 0x40, 0x3d, 0x3f, 0x3c, 0x34, + 0x3b, 0x3e, 0x42, 0x3d, 0x43, 0x35, 0x42, 0x2c, 0x35, 0x3d, 0x3c, 0x3d, + 0x3a, 0x3c, 0x46, 0x25, 0x43, 0x35, 0x3d, 0x39, 0x3a, 0x3c, 0x40, 0x2b, + 0x33, 0x40, 0x3d, 0x46, 0x45, 0x37, 0x3c, 0x36, 0x43, 0x37, 0x3e, 0x3a, + 0x3c, 0x47, 0x3f, 0x38, 0x36, 0x3e, 0x3a, 0x42, 0x3c, 0x42, 0x33, 0x39, + 0x3c, 0x3a, 0x3c, 0x40, 0x48, 0x3b, 0x40, 0x32, 0x37, 0x47, 0x34, 0x38, + 0x33, 0x3d, 0x49, 0x2d, 0x36, 0x42, 0x3d, 0x3e, 0x47, 0x3c, 0x42, 0x2c, + 0x3b, 0x31, 0x3f, 0x3c, 0x3d, 0x3c, 0x3f, 0x2b, 0x41, 0x35, 0x33, 0x43, + 0x47, 0x39, 0x34, 0x2a, 0x3a, 0x3a, 0x40, 0x3d, 0x44, 0x3c, 0x39, 0x34, + 0x43, 0x40, 0x33, 0x3a, 0x3b, 0x42, 0x38, 0x3b, 0x34, 0x35, 0x40, 0x43, + 0x4b, 0x41, 0x3d, 0x38, 0x49, 0x44, 0x4d, 0x37, 0x3a, 0x4b, 0x40, 0x39, + 0x4e, 0x3b, 0x30, 0x38, 0x47, 0x5d, 0x50, 0x1f, 0x54, 0x35, 0x3a, 0x39, + 0x40, 0x4c, 0x46, 0x42, 0x52, 0x39, 0x39, 0x45, 0x41, 0x3c, 0x30, 0x5b, + 0x43, 0x4d, 0x4a, 0x3e, 0x31, 0x39, 0x41, 0x4c, 0x36, 0x44, 0x4c, 0x39, + 0x32, 0x41, 0x47, 0x3e, 0x34, 0x49, 0x45, 0x3b, 0x34, 0x3a, 0x3b, 0x47, + 0x43, 0x3e, 0x43, 0x32, 0x40, 0x3e, 0x3e, 0x38, 0x37, 0x3e, 0x37, 0x3a, + 0x3a, 0x40, 0x48, 0x2f, 0x3e, 0x3e, 0x46, 0x3a, 0x3e, 0x35, 0x49, 0x30, + 0x3a, 0x41, 0x3e, 0x39, 0x34, 0x45, 0x3d, 0x34, 0x48, 0x43, 0x43, 0x42, + 0x33, 0x39, 0x3b, 0x3f, 0x30, 0x46, 0x41, 0x39, 0x48, 0x3a, 0x3c, 0x3e, + 0x3f, 0x36, 0x40, 0x3d, 0x43, 0x40, 0x3e, 0x39, 0x44, 0x40, 0x44, 0x3b, + 0x43, 0x42, 0x39, 0x38, 0x3a, 0x3f, 0x3b, 0x3f, 0x38, 0x3d, 0x34, 0x30, + 0x34, 0x3d, 0x3f, 0x42, 0x44, 0x3e, 0x34, 0x32, 0x37, 0x46, 0x44, 0x38, + 0x3c, 0x45, 0x39, 0x2b, 0x41, 0x3c, 0x40, 0x40, 0x3a, 0x3a, 0x3c, 0x32, + 0x45, 0x42, 0x3d, 0x46, 0x38, 0x3b, 0x34, 0x35, 0x38, 0x43, 0x3d, 0x34, + 0x42, 0x3b, 0x38, 0x3d, 0x37, 0x43, 0x3f, 0x39, 0x4e, 0x39, 0x40, 0x3f, + 0x4d, 0x43, 0x49, 0x3f, 0x36, 0x41, 0x44, 0x39, 0x48, 0x3a, 0x35, 0x39, + 0x48, 0x59, 0x4e, 0x25, 0x58, 0x39, 0x42, 0x35, 0x43, 0x4e, 0x42, 0x3f, + 0x4a, 0x43, 0x3b, 0x3f, 0x3b, 0x37, 0x2b, 0x5a, 0x3d, 0x44, 0x3b, 0x40, + 0x31, 0x38, 0x37, 0x44, 0x32, 0x3e, 0x41, 0x3d, 0x2c, 0x42, 0x42, 0x3c, + 0x37, 0x45, 0x41, 0x41, 0x3d, 0x39, 0x41, 0x40, 0x3a, 0x46, 0x41, 0x40, + 0x40, 0x3d, 0x38, 0x31, 0x37, 0x3f, 0x42, 0x38, 0x3f, 0x3c, 0x48, 0x30, + 0x3e, 0x39, 0x3f, 0x3d, 0x3d, 0x44, 0x52, 0x35, 0x3b, 0x32, 0x42, 0x32, + 0x3a, 0x43, 0x39, 0x3b, 0x31, 0x43, 0x36, 0x3c, 0x3c, 0x3c, 0x41, 0x45, + 0x42, 0x49, 0x41, 0x3b, 0x42, 0x3e, 0x41, 0x44, 0x36, 0x41, 0x3f, 0x3c, + 0x3e, 0x47, 0x45, 0x41, 0x38, 0x41, 0x3f, 0x43, 0x35, 0x32, 0x41, 0x39, + 0x36, 0x47, 0x35, 0x42, 0x44, 0x3b, 0x3f, 0x34, 0x48, 0x41, 0x43, 0x42, + 0x36, 0x3e, 0x3c, 0x3d, 0x3d, 0x3b, 0x42, 0x44, 0x3a, 0x44, 0x36, 0x2a, + 0x41, 0x39, 0x3a, 0x41, 0x46, 0x3c, 0x44, 0x2f, 0x36, 0x39, 0x3b, 0x3f, + 0x38, 0x45, 0x3c, 0x3c, 0x3e, 0x41, 0x3c, 0x39, 0x3e, 0x40, 0x2f, 0x45, + 0x3b, 0x41, 0x40, 0x3c, 0x4e, 0x38, 0x3e, 0x48, 0x46, 0x40, 0x48, 0x44, + 0x40, 0x4a, 0x45, 0x3c, 0x4f, 0x39, 0x37, 0x3a, 0x4e, 0x59, 0x5c, 0x22, + 0x58, 0x32, 0x38, 0x34, 0x40, 0x4b, 0x43, 0x43, 0x4f, 0x3e, 0x39, 0x40, + 0x37, 0x3e, 0x2f, 0x55, 0x3f, 0x40, 0x38, 0x3f, 0x3a, 0x33, 0x37, 0x3d, + 0x34, 0x4c, 0x37, 0x3f, 0x32, 0x39, 0x45, 0x34, 0x44, 0x4c, 0x3f, 0x3b, + 0x3c, 0x36, 0x36, 0x43, 0x36, 0x47, 0x41, 0x46, 0x41, 0x3e, 0x41, 0x3a, + 0x43, 0x3a, 0x48, 0x42, 0x42, 0x3e, 0x4c, 0x36, 0x3d, 0x39, 0x43, 0x46, + 0x3d, 0x42, 0x42, 0x3b, 0x45, 0x43, 0x3c, 0x40, 0x39, 0x37, 0x34, 0x45, + 0x3f, 0x40, 0x34, 0x38, 0x43, 0x3f, 0x36, 0x47, 0x3f, 0x3b, 0x49, 0x3c, + 0x3a, 0x3a, 0x42, 0x4c, 0x37, 0x3e, 0x3b, 0x32, 0x47, 0x40, 0x45, 0x4d, + 0x39, 0x3b, 0x39, 0x40, 0x3e, 0x3c, 0x3d, 0x3a, 0x3d, 0x3b, 0x3e, 0x43, + 0x3e, 0x3f, 0x3a, 0x3c, 0x41, 0x40, 0x39, 0x3c, 0x3a, 0x38, 0x39, 0x37, + 0x36, 0x33, 0x43, 0x45, 0x3f, 0x45, 0x41, 0x30, 0x3b, 0x34, 0x3c, 0x39, + 0x3b, 0x45, 0x37, 0x2e, 0x36, 0x34, 0x36, 0x44, 0x3d, 0x40, 0x3a, 0x3c, + 0x3d, 0x3b, 0x38, 0x41, 0x42, 0x3a, 0x32, 0x4b, 0x38, 0x3e, 0x41, 0x46, + 0x57, 0x3a, 0x44, 0x48, 0x47, 0x45, 0x47, 0x3e, 0x43, 0x42, 0x45, 0x3b, + 0x50, 0x39, 0x37, 0x3f, 0x47, 0x51, 0x5e, 0x22, 0x59, 0x33, 0x3c, 0x37, + 0x43, 0x50, 0x49, 0x47, 0x46, 0x42, 0x39, 0x44, 0x44, 0x3d, 0x2f, 0x53, + 0x35, 0x41, 0x40, 0x3d, 0x2d, 0x35, 0x2f, 0x3e, 0x3f, 0x37, 0x38, 0x3e, + 0x30, 0x45, 0x46, 0x38, 0x33, 0x3c, 0x3e, 0x3b, 0x44, 0x42, 0x47, 0x49, + 0x43, 0x40, 0x3d, 0x3c, 0x38, 0x43, 0x3e, 0x38, 0x3d, 0x40, 0x36, 0x43, + 0x43, 0x3e, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x3d, 0x41, 0x39, 0x3e, 0x45, + 0x39, 0x3d, 0x39, 0x40, 0x42, 0x40, 0x3b, 0x4a, 0x40, 0x41, 0x3f, 0x37, + 0x43, 0x41, 0x37, 0x4c, 0x3f, 0x3d, 0x38, 0x3a, 0x42, 0x46, 0x43, 0x4d, + 0x3c, 0x3a, 0x43, 0x3e, 0x3b, 0x3d, 0x46, 0x4a, 0x38, 0x3d, 0x3d, 0x39, + 0x3e, 0x3c, 0x3b, 0x3e, 0x3a, 0x40, 0x40, 0x34, 0x41, 0x3f, 0x3e, 0x3f, + 0x47, 0x3c, 0x32, 0x3a, 0x3c, 0x44, 0x3f, 0x42, 0x41, 0x43, 0x3e, 0x3a, + 0x3b, 0x42, 0x41, 0x39, 0x39, 0x37, 0x39, 0x3e, 0x3d, 0x33, 0x3e, 0x35, + 0x44, 0x37, 0x40, 0x35, 0x3f, 0x47, 0x37, 0x41, 0x35, 0x38, 0x47, 0x40, + 0x43, 0x44, 0x2e, 0x48, 0x35, 0x44, 0x41, 0x3c, 0x47, 0x3d, 0x3d, 0x52, + 0x48, 0x41, 0x44, 0x41, 0x42, 0x4b, 0x3e, 0x3d, 0x4e, 0x32, 0x34, 0x47, + 0x55, 0x57, 0x5f, 0x22, 0x57, 0x33, 0x40, 0x37, 0x40, 0x4a, 0x4d, 0x47, + 0x48, 0x38, 0x3e, 0x46, 0x37, 0x42, 0x28, 0x57, 0x38, 0x42, 0x36, 0x43, + 0x35, 0x37, 0x39, 0x39, 0x42, 0x39, 0x38, 0x3c, 0x35, 0x3c, 0x3c, 0x3a, + 0x3c, 0x4c, 0x45, 0x3f, 0x43, 0x3d, 0x45, 0x45, 0x40, 0x47, 0x3e, 0x3e, + 0x3d, 0x4b, 0x49, 0x35, 0x43, 0x3c, 0x36, 0x46, 0x3c, 0x46, 0x42, 0x44, + 0x3c, 0x42, 0x3d, 0x42, 0x44, 0x3c, 0x4a, 0x40, 0x40, 0x3c, 0x3b, 0x3c, + 0x35, 0x34, 0x2e, 0x46, 0x38, 0x3d, 0x38, 0x44, 0x41, 0x40, 0x3c, 0x52, + 0x3b, 0x3d, 0x3b, 0x3f, 0x42, 0x47, 0x44, 0x52, 0x44, 0x44, 0x39, 0x3f, + 0x43, 0x35, 0x3c, 0x4d, 0x39, 0x3d, 0x3b, 0x37, 0x3e, 0x38, 0x3e, 0x49, + 0x3a, 0x37, 0x3c, 0x49, 0x40, 0x41, 0x3c, 0x40, 0x3d, 0x38, 0x39, 0x3f, + 0x44, 0x3e, 0x42, 0x3e, 0x47, 0x40, 0x34, 0x46, 0x48, 0x37, 0x45, 0x3e, + 0x46, 0x3f, 0x35, 0x39, 0x38, 0x3f, 0x36, 0x2c, 0x40, 0x38, 0x3e, 0x3c, + 0x32, 0x3c, 0x46, 0x3a, 0x3f, 0x41, 0x36, 0x49, 0x42, 0x38, 0x36, 0x43, + 0x3d, 0x41, 0x46, 0x35, 0x4f, 0x3a, 0x41, 0x5c, 0x4a, 0x42, 0x4e, 0x42, + 0x46, 0x54, 0x3f, 0x45, 0x4c, 0x30, 0x33, 0x44, 0x56, 0x5d, 0x68, 0x26, + 0x60, 0x33, 0x3e, 0x3a, 0x42, 0x49, 0x52, 0x47, 0x51, 0x46, 0x40, 0x47, + 0x41, 0x3b, 0x1b, 0x4f, 0x3c, 0x45, 0x3d, 0x3d, 0x32, 0x2f, 0x3e, 0x3c, + 0x3c, 0x3f, 0x3b, 0x3c, 0x2c, 0x3a, 0x41, 0x3c, 0x35, 0x3e, 0x3e, 0x3c, + 0x3d, 0x3f, 0x3e, 0x40, 0x40, 0x44, 0x42, 0x3c, 0x3c, 0x3c, 0x41, 0x3c, + 0x3c, 0x3d, 0x3e, 0x3d, 0x3c, 0x3d, 0x4a, 0x46, 0x3f, 0x35, 0x33, 0x43, + 0x42, 0x41, 0x4d, 0x48, 0x48, 0x44, 0x3e, 0x41, 0x41, 0x36, 0x3c, 0x4c, + 0x34, 0x47, 0x42, 0x39, 0x3e, 0x43, 0x3a, 0x53, 0x3b, 0x3b, 0x42, 0x3d, + 0x41, 0x3c, 0x3e, 0x52, 0x3a, 0x44, 0x34, 0x43, 0x3d, 0x3d, 0x3a, 0x50, + 0x3e, 0x33, 0x41, 0x40, 0x3f, 0x38, 0x43, 0x42, 0x3b, 0x37, 0x3e, 0x43, + 0x3f, 0x3c, 0x41, 0x49, 0x40, 0x32, 0x40, 0x3e, 0x3b, 0x3e, 0x44, 0x3c, + 0x35, 0x37, 0x3d, 0x41, 0x34, 0x3f, 0x3a, 0x3c, 0x47, 0x32, 0x41, 0x3d, + 0x3c, 0x3a, 0x4a, 0x31, 0x43, 0x38, 0x45, 0x37, 0x49, 0x3c, 0x34, 0x3f, + 0x3d, 0x3d, 0x3d, 0x45, 0x47, 0x3e, 0x37, 0x48, 0x40, 0x3b, 0x45, 0x3d, + 0x4e, 0x42, 0x3f, 0x57, 0x4b, 0x43, 0x4b, 0x3d, 0x3f, 0x47, 0x4a, 0x43, + 0x4e, 0x30, 0x38, 0x45, 0x59, 0x60, 0x64, 0x2d, 0x5a, 0x2d, 0x34, 0x35, + 0x47, 0x54, 0x4e, 0x3f, 0x44, 0x45, 0x3c, 0x43, 0x3d, 0x40, 0x1c, 0x5a, + 0x36, 0x3f, 0x3a, 0x39, 0x37, 0x3c, 0x32, 0x3b, 0x2d, 0x4a, 0x42, 0x35, + 0x30, 0x41, 0x43, 0x3d, 0x3d, 0x45, 0x38, 0x36, 0x3e, 0x40, 0x3a, 0x4a, + 0x34, 0x3d, 0x44, 0x3c, 0x39, 0x3b, 0x52, 0x38, 0x40, 0x3b, 0x3f, 0x3f, + 0x35, 0x37, 0x46, 0x48, 0x38, 0x3b, 0x40, 0x36, 0x3d, 0x3a, 0x4f, 0x45, + 0x35, 0x3a, 0x35, 0x33, 0x37, 0x43, 0x42, 0x52, 0x37, 0x3b, 0x3d, 0x42, + 0x44, 0x3d, 0x48, 0x58, 0x33, 0x3f, 0x41, 0x44, 0x44, 0x3f, 0x3b, 0x52, + 0x47, 0x39, 0x32, 0x3b, 0x38, 0x35, 0x48, 0x50, 0x34, 0x30, 0x39, 0x43, + 0x42, 0x40, 0x3b, 0x4b, 0x43, 0x3d, 0x34, 0x44, 0x33, 0x39, 0x44, 0x4b, + 0x45, 0x3e, 0x3c, 0x3f, 0x3a, 0x3e, 0x3c, 0x45, 0x36, 0x3e, 0x3d, 0x40, + 0x43, 0x46, 0x37, 0x3d, 0x3b, 0x42, 0x43, 0x3f, 0x3a, 0x41, 0x48, 0x2f, + 0x3e, 0x39, 0x3a, 0x39, 0x3f, 0x3a, 0x41, 0x40, 0x40, 0x3c, 0x3b, 0x3b, + 0x3f, 0x40, 0x3e, 0x42, 0x38, 0x3f, 0x38, 0x3c, 0x49, 0x45, 0x3f, 0x62, + 0x55, 0x47, 0x4c, 0x3c, 0x3c, 0x4a, 0x4c, 0x46, 0x4f, 0x39, 0x3a, 0x3b, + 0x5e, 0x58, 0x6f, 0x2b, 0x5a, 0x2f, 0x3a, 0x35, 0x4b, 0x47, 0x4a, 0x46, + 0x45, 0x3e, 0x38, 0x4f, 0x3b, 0x3d, 0x21, 0x4b, 0x3d, 0x40, 0x37, 0x40, + 0x2d, 0x2c, 0x43, 0x3f, 0x2b, 0x3e, 0x3d, 0x39, 0x2f, 0x39, 0x44, 0x3c, + 0x39, 0x39, 0x43, 0x3b, 0x3d, 0x3b, 0x44, 0x39, 0x42, 0x42, 0x3e, 0x40, + 0x3b, 0x42, 0x53, 0x40, 0x32, 0x3d, 0x35, 0x3f, 0x3d, 0x45, 0x48, 0x46, + 0x3d, 0x43, 0x3c, 0x36, 0x35, 0x39, 0x3d, 0x4a, 0x39, 0x39, 0x3e, 0x41, + 0x38, 0x36, 0x3b, 0x53, 0x3c, 0x36, 0x32, 0x3b, 0x43, 0x3d, 0x42, 0x57, + 0x35, 0x2f, 0x38, 0x40, 0x2f, 0x3d, 0x3c, 0x4c, 0x40, 0x2f, 0x3a, 0x36, + 0x39, 0x3c, 0x3a, 0x51, 0x3d, 0x37, 0x39, 0x3c, 0x42, 0x40, 0x43, 0x52, + 0x3e, 0x42, 0x3e, 0x45, 0x36, 0x34, 0x42, 0x4b, 0x3a, 0x38, 0x37, 0x3f, + 0x36, 0x41, 0x3a, 0x45, 0x3e, 0x38, 0x35, 0x41, 0x35, 0x34, 0x37, 0x3c, + 0x3f, 0x31, 0x3c, 0x35, 0x33, 0x43, 0x36, 0x28, 0x44, 0x42, 0x3e, 0x42, + 0x3a, 0x41, 0x43, 0x35, 0x3d, 0x3f, 0x40, 0x3e, 0x3d, 0x33, 0x31, 0x41, + 0x3d, 0x40, 0x3b, 0x40, 0x51, 0x40, 0x3f, 0xfb, 0x51, 0x49, 0x4c, 0x3d, + 0x44, 0x4e, 0x47, 0x42, 0x50, 0x39, 0x39, 0x40, 0x59, 0x5d, 0x70, 0x2c, + 0x59, 0x39, 0x38, 0x2f, 0x46, 0x50, 0x51, 0x47, 0x4c, 0x3c, 0x39, 0x48, + 0x44, 0x3a, 0x1a, 0x51, 0x35, 0x3e, 0x34, 0x3a, 0x3d, 0x2b, 0x41, 0x39, + 0x37, 0x4d, 0x3e, 0x43, 0x38, 0x3b, 0x3a, 0x35, 0x36, 0x3a, 0x43, 0x39, + 0x39, 0x3a, 0x46, 0x3b, 0x39, 0x3c, 0x46, 0x36, 0x3e, 0x3d, 0x4b, 0x3d, + 0x3b, 0x46, 0x3a, 0x41, 0x31, 0x3c, 0x44, 0x4a, 0x37, 0x42, 0x39, 0x43, + 0x43, 0x3e, 0x40, 0x47, 0x3c, 0x3e, 0x3b, 0x43, 0x34, 0x3a, 0x43, 0x53, + 0x3f, 0x37, 0x39, 0x37, 0x3e, 0x3b, 0x46, 0x59, 0x37, 0x37, 0x33, 0x3d, + 0x38, 0x42, 0x36, 0x58, 0x2e, 0x32, 0x2b, 0x45, 0x32, 0x33, 0x36, 0x50, + 0x41, 0x3f, 0x37, 0x3d, 0x3f, 0x3d, 0x46, 0x49, 0x41, 0x38, 0x33, 0x3d, + 0x33, 0x32, 0x3a, 0x49, 0x41, 0x41, 0x3d, 0x33, 0x3b, 0x3b, 0x3a, 0x46, + 0x34, 0x44, 0x3f, 0x3b, 0x2f, 0x3f, 0x32, 0x3c, 0x3f, 0x43, 0x3e, 0x45, + 0x3a, 0x3c, 0x43, 0x26, 0x46, 0x37, 0x38, 0x3e, 0x36, 0x31, 0x3e, 0x34, + 0x39, 0x3a, 0x38, 0x42, 0x38, 0x3e, 0x32, 0x42, 0x37, 0x37, 0x3c, 0x3a, + 0x48, 0x44, 0x3a, 0x68, 0x56, 0x46, 0x4d, 0x47, 0x40, 0x4e, 0x42, 0x46, + 0x51, 0x40, 0x38, 0x43, 0x58, 0x5d, 0x6a, 0x31, 0x57, 0x32, 0x3c, 0x36, + 0x49, 0x56, 0x52, 0x48, 0x4b, 0x41, 0x2f, 0x4d, 0x31, 0x43, 0x1b, 0x4c, + 0x30, 0x44, 0x33, 0x36, 0x2c, 0x3d, 0x45, 0x3a, 0x35, 0x46, 0x3d, 0x39, + 0x2e, 0x38, 0x3f, 0x37, 0x41, 0x44, 0x46, 0x31, 0x33, 0x46, 0x37, 0x37, + 0x3f, 0x41, 0x45, 0x30, 0x46, 0x3b, 0x50, 0x3b, 0x40, 0x39, 0x42, 0x43, + 0x35, 0x37, 0x40, 0x44, 0x3b, 0x41, 0x3d, 0x37, 0x3a, 0x41, 0x3d, 0x46, + 0x36, 0x41, 0x38, 0x41, 0x38, 0x3d, 0x45, 0x58, 0x3d, 0x3a, 0x3d, 0x44, + 0x45, 0x38, 0x48, 0x5c, 0x3d, 0x39, 0x43, 0x45, 0x41, 0x3e, 0x4a, 0x56, + 0x40, 0x33, 0x30, 0x31, 0x42, 0x39, 0x38, 0x56, 0x30, 0x3a, 0x35, 0x3e, + 0x3f, 0x38, 0x36, 0x47, 0x3c, 0x3a, 0x3d, 0x3f, 0x37, 0x35, 0x3b, 0x4d, + 0x43, 0x36, 0x39, 0x37, 0x3e, 0x42, 0x3d, 0x3f, 0x40, 0x3f, 0x34, 0x3b, + 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a, 0x3a, 0x3c, 0x34, 0x3f, 0x3c, 0x2a, + 0x49, 0x3b, 0x36, 0x3c, 0x35, 0x46, 0x38, 0x3b, 0x3c, 0x39, 0x38, 0x42, + 0x39, 0x36, 0x2e, 0x4a, 0x3d, 0x39, 0x3f, 0x3f, 0x4b, 0x45, 0x3e, 0x67, + 0x4b, 0x4b, 0x49, 0x3e, 0x3f, 0x53, 0x4c, 0x55, 0x47, 0x32, 0x3b, 0x39, + 0x54, 0x5b, 0x6f, 0x29, 0x5a, 0x34, 0x3e, 0x26, 0x45, 0x52, 0x59, 0x44, + 0x59, 0x39, 0x3c, 0x47, 0x36, 0x46, 0x16, 0x50, 0x32, 0x46, 0x34, 0x35, + 0x35, 0x2d, 0x39, 0x38, 0x2c, 0x42, 0x43, 0x3b, 0x32, 0x3f, 0x37, 0x2f, + 0x34, 0x43, 0x46, 0x3b, 0x3b, 0x41, 0x3c, 0x37, 0x3e, 0x43, 0x4b, 0x36, + 0x3e, 0x3c, 0x4c, 0x42, 0x40, 0x3f, 0x49, 0x40, 0x3c, 0x40, 0x3c, 0x48, + 0x35, 0x42, 0x3f, 0x42, 0x44, 0x40, 0x45, 0x4f, 0x3f, 0x3f, 0x40, 0x42, + 0x3b, 0x3d, 0x49, 0x55, 0x42, 0x39, 0x41, 0x3b, 0x3f, 0x38, 0x44, 0x60, + 0x34, 0x40, 0x3b, 0x3b, 0x35, 0x3d, 0x41, 0x4e, 0x35, 0x33, 0x30, 0x3a, + 0x3a, 0x32, 0x42, 0x4f, 0x33, 0x34, 0x2f, 0x38, 0x49, 0x38, 0x40, 0x4c, + 0x35, 0x38, 0x3e, 0x46, 0x3f, 0x3a, 0x3a, 0x45, 0x3b, 0x34, 0x2e, 0x39, + 0x32, 0x3e, 0x40, 0x48, 0x35, 0x44, 0x3a, 0x34, 0x3f, 0x35, 0x3b, 0x32, + 0x40, 0x43, 0x3e, 0x38, 0x3b, 0x43, 0x3c, 0x2b, 0x46, 0x43, 0x40, 0x32, + 0x42, 0x3b, 0x49, 0x2e, 0x3b, 0x3a, 0x3e, 0x41, 0x3c, 0x3f, 0x31, 0x3b, + 0x41, 0x33, 0x41, 0x3c, 0x4d, 0x40, 0x38, 0x68, 0x4c, 0x4c, 0x4e, 0x3f, + 0x3f, 0x54, 0x4a, 0x3d, 0x4c, 0x33, 0x3b, 0x3a, 0x5d, 0x60, 0x71, 0x2b, + 0x59, 0x33, 0x3c, 0x2c, 0x47, 0x52, 0x4f, 0x51, 0x56, 0x3d, 0x39, 0x44, + 0x35, 0x41, 0x1b, 0x4a, 0x35, 0x41, 0x37, 0x35, 0x2c, 0x35, 0x37, 0x35, + 0x38, 0x41, 0x38, 0x3e, 0x3c, 0x40, 0x3c, 0x2f, 0x38, 0x3e, 0x3f, 0x45, + 0x40, 0x3d, 0x3c, 0x35, 0x3c, 0x46, 0x43, 0x39, 0x37, 0x42, 0x4e, 0x3c, + 0x42, 0x46, 0x37, 0x33, 0x43, 0x3f, 0x47, 0x4a, 0x3d, 0x3e, 0x40, 0x40, + 0x40, 0x3f, 0x4b, 0x54, 0x36, 0x3f, 0x37, 0x40, 0x39, 0x39, 0x47, 0x51, + 0x3d, 0x39, 0x36, 0x36, 0x40, 0x40, 0x41, 0x5a, 0x38, 0x39, 0x42, 0x38, + 0x40, 0x39, 0x43, 0x50, 0x3a, 0x3a, 0x32, 0x3c, 0x3c, 0x35, 0x44, 0x4a, + 0x37, 0x35, 0x36, 0x3c, 0x35, 0x30, 0x48, 0x4b, 0x3c, 0x33, 0x37, 0x3e, + 0x42, 0x3c, 0x42, 0x4e, 0x41, 0x32, 0x3e, 0x33, 0x49, 0x39, 0x3e, 0x42, + 0x3d, 0x39, 0x37, 0x36, 0x35, 0x41, 0x3e, 0x37, 0x37, 0x3e, 0x3d, 0x38, + 0x3a, 0x3c, 0x41, 0x29, 0x3c, 0x3b, 0x39, 0x40, 0x43, 0x3d, 0x3e, 0x33, + 0x3f, 0x3f, 0x3e, 0x43, 0x43, 0x38, 0x38, 0x41, 0x3b, 0x38, 0x35, 0x3a, + 0x4b, 0x44, 0x44, 0x55, 0x4e, 0x44, 0x4d, 0x49, 0x3e, 0x53, 0x45, 0x3f, + 0x45, 0x3d, 0x36, 0x36, 0x4f, 0x5b, 0x6b, 0x28, 0x59, 0x34, 0x39, 0x34, + 0x4f, 0x4d, 0x52, 0x3e, 0x51, 0x34, 0x35, 0x4a, 0x3b, 0x3f, 0x21, 0x45, + 0x36, 0x3f, 0x38, 0x33, 0x2c, 0x37, 0x32, 0x2f, 0x2b, 0x44, 0x47, 0x3f, + 0x38, 0x3a, 0x3f, 0x2e, 0x41, 0x3f, 0x3d, 0x41, 0x35, 0x48, 0x43, 0x40, + 0x33, 0x44, 0x40, 0x38, 0x47, 0x44, 0x4c, 0x3d, 0x41, 0x3b, 0x39, 0x36, + 0x3e, 0x44, 0x49, 0x48, 0x3c, 0x3b, 0x34, 0x34, 0x3f, 0x3c, 0x42, 0x52, + 0x43, 0x41, 0x3c, 0x3c, 0x3d, 0x43, 0x48, 0x54, 0x39, 0x35, 0x39, 0x3c, + 0x43, 0x3c, 0x44, 0x5f, 0x39, 0x3d, 0x38, 0x3f, 0x36, 0x3d, 0x43, 0x58, + 0x33, 0x3d, 0x43, 0x33, 0x3f, 0x36, 0x39, 0x54, 0x3a, 0x37, 0x2d, 0x46, + 0x43, 0x41, 0x47, 0x46, 0x3e, 0x42, 0x34, 0x49, 0x3a, 0x3f, 0x38, 0x50, + 0x3a, 0x3b, 0x42, 0x3a, 0x3e, 0x3c, 0x3b, 0x40, 0x42, 0x45, 0x37, 0x3b, + 0x2f, 0x3b, 0x46, 0x30, 0x42, 0x3b, 0x3b, 0x44, 0x3b, 0x3e, 0x40, 0x1e, + 0x33, 0x40, 0x40, 0x3d, 0x39, 0x3a, 0x41, 0x33, 0x45, 0x3e, 0x3c, 0x3f, + 0x3f, 0x38, 0x31, 0x46, 0x3b, 0x35, 0x42, 0x39, 0x49, 0x3e, 0x3d, 0x66, + 0x53, 0x3f, 0x44, 0x40, 0x43, 0x45, 0x48, 0x45, 0x49, 0x2d, 0x3e, 0x3a, + 0x4f, 0x5a, 0x62, 0x27, 0x54, 0x37, 0x35, 0x34, 0x42, 0x50, 0x54, 0x43, + 0x4d, 0x38, 0x39, 0x48, 0x38, 0x4c, 0x21, 0x3f, 0x40, 0x3a, 0x3a, 0x2f, + 0x37, 0x2f, 0x29, 0x2c, 0x36, 0x47, 0x3f, 0x41, 0x31, 0x33, 0x3e, 0x32, + 0x3e, 0x40, 0x42, 0x40, 0x42, 0x3a, 0x46, 0x33, 0x44, 0x40, 0x3c, 0x43, + 0x3d, 0x41, 0x4d, 0x3d, 0x3c, 0x47, 0x46, 0x43, 0x42, 0x3e, 0x44, 0x4e, + 0x41, 0x3a, 0x44, 0x38, 0x45, 0x3b, 0x49, 0x4c, 0x40, 0x3f, 0x37, 0x3e, + 0x3e, 0x46, 0x41, 0x51, 0x3f, 0x39, 0x30, 0x40, 0x3e, 0x38, 0x43, 0x5b, + 0x33, 0x3e, 0x31, 0x42, 0x3d, 0x2f, 0x49, 0x57, 0x37, 0x31, 0x46, 0x44, + 0x3e, 0x35, 0x40, 0x55, 0x36, 0x35, 0x3d, 0x3c, 0x38, 0x33, 0x42, 0x52, + 0x3b, 0x39, 0x34, 0x31, 0x45, 0x34, 0x3c, 0x51, 0x33, 0x39, 0x3c, 0x40, + 0x36, 0x36, 0x42, 0x3e, 0x37, 0x3e, 0x3b, 0x40, 0x3d, 0x36, 0x41, 0x30, + 0x42, 0x45, 0x40, 0x49, 0x3d, 0x32, 0x46, 0x26, 0x40, 0x44, 0x3a, 0x3f, + 0x3d, 0x46, 0x45, 0x31, 0x33, 0x34, 0x3e, 0x37, 0x46, 0x3b, 0x32, 0x3a, + 0x3d, 0x31, 0x3c, 0x36, 0x50, 0x41, 0x3b, 0x5d, 0x53, 0x42, 0x44, 0x37, + 0x3e, 0x4d, 0x41, 0x4b, 0x49, 0x2f, 0x35, 0x3a, 0x4e, 0x59, 0x5d, 0x27, + 0x5c, 0x30, 0x3d, 0x3a, 0x46, 0x50, 0x57, 0x4a, 0x4c, 0x36, 0x37, 0x46, + 0x48, 0x41, 0x24, 0x49, 0x36, 0x3e, 0x41, 0x45, 0x37, 0x38, 0x2e, 0x2e, + 0x34, 0x3c, 0x38, 0x41, 0x36, 0x3d, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x3b, + 0x42, 0x3c, 0x43, 0x38, 0x3e, 0x3d, 0x41, 0x48, 0x47, 0x4c, 0x45, 0x3b, + 0x37, 0x41, 0x38, 0x41, 0x3d, 0x41, 0x46, 0x4e, 0x36, 0x45, 0x38, 0x39, + 0x42, 0x42, 0x37, 0x4c, 0x34, 0x46, 0x3c, 0x44, 0x4a, 0x39, 0x45, 0x53, + 0x3c, 0x3f, 0x41, 0x35, 0x3c, 0x45, 0x4c, 0x5a, 0x44, 0x41, 0x30, 0x35, + 0x40, 0x39, 0x42, 0x5a, 0x36, 0x36, 0x3a, 0x3b, 0x43, 0x35, 0x3c, 0x56, + 0x35, 0x38, 0x2b, 0x4a, 0x3c, 0x40, 0x45, 0x54, 0x37, 0x37, 0x3a, 0x44, + 0x42, 0x3b, 0x3d, 0x4a, 0x3f, 0x37, 0x3b, 0x35, 0x34, 0x3f, 0x40, 0x48, + 0x45, 0x3e, 0x37, 0x38, 0x41, 0x41, 0x3d, 0x37, 0x43, 0x3d, 0x3d, 0x45, + 0x3a, 0x38, 0x3f, 0x23, 0x4a, 0x37, 0x42, 0x3c, 0x3f, 0x43, 0x42, 0x33, + 0x37, 0x39, 0x35, 0x3b, 0x41, 0x36, 0x2f, 0x3b, 0x41, 0x3a, 0x44, 0x3d, + 0x3e, 0x45, 0x44, 0x50, 0x47, 0x47, 0x48, 0x3c, 0x3f, 0x45, 0x43, 0x3f, + 0x4a, 0x33, 0x3c, 0x3a, 0x52, 0x52, 0x5a, 0x23, 0x58, 0x31, 0x3b, 0x3b, + 0x47, 0x44, 0x54, 0x45, 0x42, 0x38, 0x38, 0x40, 0x43, 0x3f, 0x2a, 0x46, + 0x3b, 0x46, 0x3b, 0x46, 0x35, 0x37, 0x29, 0x35, 0x38, 0x41, 0x3a, 0x31, + 0x44, 0x41, 0x39, 0x36, 0x45, 0x41, 0x40, 0x3e, 0x40, 0x44, 0x47, 0x37, + 0x3f, 0x42, 0x49, 0x34, 0x46, 0x3d, 0x4b, 0x3d, 0x42, 0x3b, 0x42, 0x3e, + 0x41, 0x3b, 0x3f, 0x43, 0x47, 0x45, 0x47, 0x41, 0x40, 0x3a, 0x3d, 0x45, + 0x40, 0x36, 0x3b, 0x3b, 0x44, 0x37, 0x46, 0x55, 0x35, 0x42, 0x3f, 0x3a, + 0x41, 0x41, 0x44, 0x5c, 0x31, 0x44, 0x3d, 0x46, 0x39, 0x38, 0x46, 0x59, + 0x41, 0x3b, 0x3d, 0x39, 0x33, 0x3e, 0x41, 0x58, 0x33, 0x44, 0x34, 0x31, + 0x48, 0x3e, 0x4d, 0x56, 0x36, 0x3c, 0x37, 0x46, 0x46, 0x38, 0x45, 0x53, + 0x35, 0x3d, 0x3a, 0x31, 0x42, 0x48, 0x45, 0x44, 0x3b, 0x3b, 0x3c, 0x41, + 0x3d, 0x42, 0x3f, 0x2f, 0x38, 0x3c, 0x3e, 0x41, 0x44, 0x3a, 0x4a, 0x24, + 0x37, 0x3e, 0x37, 0x48, 0x40, 0x3f, 0x46, 0x3c, 0x47, 0x4a, 0x38, 0x47, + 0x34, 0x45, 0x31, 0x42, 0x43, 0x44, 0x3f, 0x3f, 0x49, 0x40, 0x3c, 0x41, + 0x4d, 0x43, 0x42, 0x39, 0x39, 0x48, 0x41, 0x38, 0x47, 0x3c, 0x3c, 0x42, + 0x44, 0x55, 0x62, 0x2a, 0x5c, 0x32, 0x3a, 0x37, 0x4c, 0x44, 0x4f, 0x3e, + 0x4e, 0x42, 0x3a, 0x42, 0x41, 0x4a, 0x35, 0x44, 0x45, 0x3b, 0x43, 0x41, + 0x33, 0x38, 0x28, 0x36, 0x40, 0x47, 0x3e, 0x3e, 0x3e, 0x39, 0x3a, 0x37, + 0x44, 0x44, 0x3f, 0x3b, 0x41, 0x3c, 0x45, 0x36, 0x38, 0x3a, 0x3c, 0x42, + 0x42, 0x3f, 0x59, 0x3c, 0x47, 0x3d, 0x38, 0x3a, 0x42, 0x44, 0x41, 0x46, + 0x3f, 0x43, 0x48, 0x42, 0x44, 0x35, 0x3f, 0x45, 0x36, 0x3f, 0x38, 0x3a, + 0x44, 0x3d, 0x3d, 0x4e, 0x3e, 0x45, 0x40, 0x42, 0x3c, 0x33, 0x43, 0x5a, + 0x38, 0x3e, 0x45, 0x3a, 0x3e, 0x42, 0x45, 0x52, 0x3c, 0x42, 0x3a, 0x38, + 0x3d, 0x3b, 0x4a, 0x57, 0x38, 0x37, 0x47, 0x44, 0x3e, 0x3c, 0x38, 0x48, + 0x36, 0x41, 0x3f, 0x41, 0x3a, 0x3a, 0x46, 0x47, 0x42, 0x40, 0x32, 0x33, + 0x43, 0x37, 0x41, 0x43, 0x3e, 0x40, 0x3d, 0x3a, 0x3e, 0x38, 0x42, 0x30, + 0x3e, 0x40, 0x46, 0x42, 0x40, 0x44, 0x42, 0x23, 0x31, 0x40, 0x3f, 0x3d, + 0x3b, 0x33, 0x40, 0x33, 0x41, 0x33, 0x43, 0x41, 0x3a, 0x3e, 0x36, 0x40, + 0x40, 0x45, 0x37, 0x42, 0x46, 0x42, 0x39, 0x48, 0x44, 0x40, 0x40, 0x45, + 0x3c, 0x49, 0x41, 0x3f, 0x4c, 0x3d, 0x2f, 0x3f, 0x47, 0x52, 0x54, 0x2c, + 0x55, 0x42, 0x44, 0x3b, 0x46, 0x4f, 0x48, 0x3c, 0x45, 0x39, 0x3f, 0x4b, + 0x3f, 0x3f, 0x36, 0x42, 0x41, 0x48, 0x44, 0x44, 0x36, 0x3b, 0x37, 0x40, + 0x39, 0x49, 0x3a, 0x35, 0x3e, 0x48, 0x31, 0x30, 0x44, 0x38, 0x4c, 0x3c, + 0x41, 0x3e, 0x46, 0x32, 0x44, 0x3b, 0x42, 0x3c, 0x38, 0x3a, 0x47, 0x3f, + 0x3a, 0x42, 0x3a, 0x43, 0x40, 0x4b, 0x47, 0x3c, 0x42, 0x46, 0x45, 0x42, + 0x3c, 0x46, 0x3d, 0x3f, 0x3e, 0x36, 0x38, 0x3e, 0x46, 0x3c, 0x4d, 0x43, + 0x49, 0x41, 0x48, 0x3c, 0x3d, 0x39, 0x43, 0x58, 0x3a, 0x41, 0x3f, 0x38, + 0x37, 0x3f, 0x46, 0x5d, 0x3c, 0x3c, 0x39, 0x36, 0x3d, 0x46, 0x43, 0x50, + 0x3a, 0x47, 0x39, 0x36, 0x41, 0x3f, 0x3e, 0x51, 0x31, 0x36, 0x3e, 0x3c, + 0x3c, 0x3a, 0x48, 0x41, 0x3a, 0x43, 0x49, 0x3e, 0x42, 0x46, 0x3f, 0x41, + 0x49, 0x33, 0x42, 0x41, 0x45, 0x40, 0x3d, 0x2b, 0x3d, 0x38, 0x40, 0x37, + 0x3a, 0x31, 0x45, 0x26, 0x33, 0x3d, 0x3f, 0x39, 0x36, 0x3c, 0x38, 0x33, + 0x34, 0x3f, 0x35, 0x44, 0x3a, 0x39, 0x32, 0x41, 0x35, 0x40, 0x3c, 0x3b, + 0x4a, 0x3f, 0x3e, 0x3e, 0x4a, 0x3e, 0x42, 0x35, 0x38, 0x43, 0x3c, 0x37, + 0x3d, 0x3c, 0x39, 0x43, 0x3f, 0x4e, 0x54, 0x33, 0x4b, 0x37, 0x43, 0x3b, + 0x43, 0x48, 0x43, 0x42, 0x3d, 0x46, 0x45, 0x49, 0x3a, 0x39, 0x36, 0x4a, + 0x48, 0x48, 0x37, 0x4b, 0x42, 0x47, 0x34, 0x34, 0x43, 0x42, 0x3a, 0x3d, + 0x3c, 0x46, 0x34, 0x39, 0x40, 0x3b, 0x3e, 0x3e, 0x37, 0x3d, 0x53, 0x3b, + 0x48, 0x3c, 0x43, 0x44, 0x3b, 0x3f, 0x57, 0x3d, 0x39, 0x3c, 0x39, 0x3a, + 0x3e, 0x3f, 0x43, 0x3e, 0x41, 0x47, 0x3c, 0x41, 0x40, 0x41, 0x37, 0x3f, + 0x3b, 0x43, 0x35, 0x3e, 0x45, 0x40, 0x47, 0x59, 0x41, 0x49, 0x3b, 0x3f, + 0x47, 0x49, 0x4b, 0x61, 0x39, 0x48, 0x39, 0x3e, 0x44, 0x34, 0x3b, 0x59, + 0x3c, 0x42, 0x45, 0x35, 0x42, 0x41, 0x39, 0x52, 0x42, 0x3c, 0x3d, 0x3e, + 0x3d, 0x4a, 0x4a, 0x4d, 0x3c, 0x34, 0x44, 0x3c, 0x41, 0x34, 0x38, 0x46, + 0x38, 0x45, 0x40, 0x45, 0x40, 0x3a, 0x3d, 0x44, 0x3a, 0x37, 0x3a, 0x3a, + 0x3b, 0x42, 0x40, 0x34, 0x3b, 0x3c, 0x42, 0x40, 0x3d, 0x32, 0x40, 0x27, + 0x37, 0x39, 0x37, 0x46, 0x48, 0x31, 0x40, 0x30, 0x42, 0x42, 0x3a, 0x40, + 0x3d, 0x37, 0x2a, 0x40, 0x41, 0x37, 0x3c, 0x4a, 0x46, 0x45, 0x3d, 0x34, + 0x48, 0x41, 0x42, 0x3e, 0x3f, 0x39, 0x3c, 0x3a, 0x4f, 0x3b, 0x32, 0x3e, + 0x43, 0x51, 0x4f, 0x2a, 0x46, 0x3a, 0x3d, 0x3b, 0x40, 0x3d, 0x4c, 0x3c, + 0x48, 0x40, 0x36, 0x4a, 0x3a, 0x38, 0x42, 0x43, 0x4c, 0x3d, 0x47, 0x47, + 0x33, 0x3f, 0x2d, 0x37, 0x4a, 0x43, 0x38, 0x3e, 0x49, 0x42, 0x42, 0x3d, + 0x43, 0x47, 0x41, 0x38, 0x46, 0x37, 0x46, 0x38, 0x47, 0x42, 0x49, 0x3d, + 0x3b, 0x37, 0x4c, 0x3c, 0x3a, 0x45, 0x3f, 0x37, 0x36, 0x3d, 0x3c, 0x40, + 0x3e, 0x45, 0x46, 0x41, 0x41, 0x40, 0x3c, 0x44, 0x47, 0x43, 0x37, 0x3f, + 0x3e, 0x3a, 0x3a, 0x4b, 0x3a, 0x36, 0x3d, 0x3f, 0x38, 0x3f, 0x3c, 0x58, + 0x40, 0x49, 0x3d, 0x42, 0x38, 0x3a, 0x47, 0x50, 0x3b, 0x49, 0x40, 0x44, + 0x3e, 0x3c, 0x38, 0x52, 0x3a, 0x3e, 0x44, 0x3c, 0x35, 0x44, 0x3a, 0x47, + 0x3e, 0x49, 0x3f, 0x47, 0x45, 0x39, 0x3b, 0x46, 0x44, 0x3e, 0x41, 0x46, + 0x40, 0x41, 0x40, 0x40, 0x3a, 0x35, 0x3e, 0x36, 0x3e, 0x3e, 0x3d, 0x35, + 0x3b, 0x3c, 0x38, 0x46, 0x3b, 0x3c, 0x41, 0x2c, 0x3f, 0x42, 0x38, 0x3b, + 0x36, 0x3b, 0x39, 0x40, 0x40, 0x38, 0x36, 0x33, 0x34, 0x42, 0x2f, 0x44, + 0x41, 0x40, 0x39, 0x35, 0x3b, 0x44, 0x42, 0x2c, 0x41, 0x3b, 0x44, 0x41, + 0x35, 0x44, 0x3b, 0x34, 0x44, 0x49, 0x36, 0x39, 0x3a, 0x52, 0x4d, 0x2b, + 0x40, 0x40, 0x3e, 0x39, 0x48, 0x42, 0x3c, 0x44, 0x46, 0x49, 0x3f, 0x54, + 0x43, 0x40, 0x2e, 0x40, 0x4f, 0x36, 0x3e, 0x3f, 0x38, 0x48, 0x44, 0x3c, + 0x44, 0x43, 0x41, 0x47, 0x40, 0x46, 0x40, 0x37, 0x41, 0x34, 0x3a, 0x41, + 0x41, 0x3b, 0x49, 0x39, 0x42, 0x38, 0x3d, 0x39, 0x34, 0x35, 0x43, 0x36, + 0x3e, 0x44, 0x3f, 0x40, 0x43, 0x40, 0x40, 0x3a, 0x47, 0x42, 0x3e, 0x42, + 0x46, 0x35, 0x3a, 0x46, 0x3c, 0x3c, 0x3c, 0x3d, 0x3f, 0x40, 0x43, 0x4c, + 0x3a, 0x37, 0x3f, 0x43, 0x47, 0x38, 0x42, 0x58, 0x42, 0x3b, 0x34, 0x37, + 0x3e, 0x48, 0x3c, 0x57, 0x44, 0x3c, 0x3d, 0x3a, 0x36, 0x48, 0x3c, 0x51, + 0x3d, 0x48, 0x45, 0x45, 0x38, 0x45, 0x40, 0x3f, 0x3b, 0x35, 0x3d, 0x3f, + 0x38, 0x47, 0x39, 0x3b, 0x36, 0x49, 0x43, 0x40, 0x3f, 0x46, 0x38, 0x40, + 0x3f, 0x3e, 0x39, 0x32, 0x47, 0x42, 0x35, 0x33, 0x39, 0x47, 0x3c, 0x36, + 0x3b, 0x37, 0x43, 0x35, 0x3b, 0x3b, 0x34, 0x3b, 0x38, 0x3d, 0x3e, 0x3a, + 0x35, 0x49, 0x38, 0x40, 0x3f, 0x3f, 0x3e, 0x37, 0x43, 0x3b, 0x3e, 0x3e, + 0x3b, 0x40, 0x44, 0x39, 0x3d, 0x3f, 0x31, 0x42, 0x42, 0x3b, 0x41, 0x3d, + 0x3e, 0x3c, 0x37, 0x34, 0x48, 0x3d, 0x49, 0x4a, 0x47, 0x36, 0x3a, 0x34, + 0x37, 0x36, 0x3e, 0x38, 0x33, 0x45, 0x39, 0x44, 0x34, 0x49, 0x3a, 0x3d, + 0x34, 0x31, 0x31, 0x3d, 0x34, 0x3d, 0x41, 0x3e, 0x49, 0x41, 0x34, 0x3f, + 0x3a, 0x42, 0x3e, 0x40, 0x3f, 0x33, 0x46, 0x3f, 0x34, 0x39, 0x37, 0x46, + 0x3e, 0x32, 0x3f, 0x45, 0x45, 0x41, 0x3b, 0x4b, 0x35, 0x35, 0x3b, 0x4a, + 0x3d, 0x43, 0x3b, 0x44, 0x3c, 0x38, 0x31, 0x43, 0x39, 0x35, 0x41, 0x45, + 0x37, 0x3e, 0x43, 0x47, 0x39, 0x40, 0x41, 0x41, 0x40, 0x32, 0x37, 0x3e, + 0x3d, 0x39, 0x3b, 0x49, 0x33, 0x35, 0x38, 0x41, 0x45, 0x37, 0x3c, 0x49, + 0x3b, 0x34, 0x34, 0x41, 0x3a, 0x3f, 0x3e, 0x47, 0x39, 0x3c, 0x34, 0x3a, + 0x38, 0x44, 0x40, 0x51, 0x3a, 0x37, 0x3b, 0x3f, 0x3d, 0x3a, 0x45, 0x48, + 0x3f, 0x46, 0x35, 0x43, 0x38, 0x43, 0x35, 0x4c, 0x42, 0x47, 0x44, 0x3d, + 0x40, 0x3a, 0x39, 0x4e, 0x3d, 0x37, 0x3c, 0x42, 0x40, 0x48, 0x44, 0x4c, + 0x31, 0x40, 0x42, 0x3b, 0x45, 0x45, 0x3f, 0x3e, 0x3d, 0x44, 0x3f, 0x31, + 0x3f, 0x44, 0x45, 0x37, 0x3e, 0x3d, 0x35, 0x3b, 0x2d, 0x44, 0x4a, 0x3a, + 0x2b, 0x37, 0x38, 0x46, 0x41, 0x39, 0x3c, 0x3c, 0x46, 0x33, 0x36, 0x3c, + 0x4b, 0x34, 0x49, 0x50, 0x30, 0x3c, 0x33, 0x41, 0x44, 0x33, 0x43, 0x39, + 0x36, 0x45, 0x33, 0x3b, 0x3d, 0x36, 0x47, 0x30, 0x42, 0x37, 0x49, 0x3e, + 0x3b, 0x49, 0x3d, 0x3b, 0x3a, 0x41, 0x38, 0x44, 0x42, 0x3b, 0x3f, 0x40, + 0x46, 0x35, 0x38, 0x3c, 0x48, 0x3a, 0x46, 0x41, 0x36, 0x36, 0x41, 0x3e, + 0x43, 0x3e, 0x32, 0x39, 0x3a, 0x41, 0x30, 0x3e, 0x40, 0x3e, 0x36, 0x3a, + 0x45, 0x45, 0x3a, 0x3c, 0x31, 0x3b, 0x47, 0x3f, 0x36, 0x3a, 0x3c, 0x41, + 0x3b, 0x41, 0x39, 0x46, 0x3f, 0x3c, 0x34, 0x3e, 0x41, 0x45, 0x41, 0x42, + 0x39, 0x40, 0x40, 0x44, 0x45, 0x42, 0x34, 0x3f, 0x3e, 0x31, 0x3b, 0x41, + 0x33, 0x43, 0x37, 0x44, 0x44, 0x3a, 0x36, 0x36, 0x48, 0x3c, 0x37, 0x47, + 0x39, 0x3e, 0x3e, 0x3c, 0x3c, 0x41, 0x3c, 0x44, 0x3b, 0x42, 0x3f, 0x3a, + 0x43, 0x3b, 0x3e, 0x48, 0x36, 0x3f, 0x3d, 0x34, 0x40, 0x43, 0x35, 0x4f, + 0x34, 0x39, 0x3b, 0x41, 0x40, 0x39, 0x37, 0x4c, 0x39, 0x36, 0x39, 0x39, + 0x47, 0x41, 0x43, 0x3f, 0x3f, 0x33, 0x42, 0x3f, 0x42, 0x40, 0x37, 0x40, + 0x3f, 0x34, 0x45, 0x3d, 0x2d, 0x3c, 0x44, 0x3b, 0x43, 0x37, 0x26, 0x50, + 0x43, 0x44, 0x3d, 0x43, 0x42, 0x2d, 0x3c, 0x33, 0x4a, 0x32, 0x4a, 0x53, + 0x33, 0x38, 0x27, 0x36, 0x42, 0x30, 0x47, 0x3d, 0x36, 0x45, 0x46, 0x36, + 0x3b, 0x3b, 0x40, 0x33, 0x37, 0x36, 0x44, 0x46, 0x3d, 0x35, 0x40, 0x38, + 0x3b, 0x40, 0x36, 0x3c, 0x3d, 0x37, 0x31, 0x41, 0x33, 0x3c, 0x38, 0x3f, + 0x43, 0x3a, 0x40, 0x49, 0x38, 0x39, 0x38, 0x3d, 0x43, 0x3d, 0x39, 0x3b, + 0x3d, 0x3f, 0x38, 0x42, 0x34, 0x43, 0x33, 0x3e, 0x43, 0x3e, 0x40, 0x42, + 0x3b, 0x45, 0x37, 0x44, 0x43, 0x39, 0x3c, 0x3d, 0x37, 0x44, 0x3a, 0x3b, + 0x47, 0x3f, 0x3a, 0x3c, 0x3a, 0x3b, 0x3f, 0x43, 0x3e, 0x3d, 0x46, 0x3e, + 0x37, 0x36, 0x3f, 0x40, 0x42, 0x42, 0x37, 0x36, 0x48, 0x35, 0x44, 0x44, + 0x39, 0x3c, 0x3b, 0x41, 0x44, 0x49, 0x3a, 0x40, 0x41, 0x36, 0x33, 0x3a, + 0x3c, 0x3d, 0x40, 0x3f, 0x43, 0x36, 0x3c, 0x3a, 0x3f, 0x4b, 0x32, 0x49, + 0x49, 0x3e, 0x3a, 0x3e, 0x3f, 0x41, 0x3c, 0x47, 0x40, 0x41, 0x45, 0x3e, + 0x47, 0x47, 0x3f, 0x47, 0x45, 0x3e, 0x31, 0x43, 0x4a, 0x44, 0x36, 0x40, + 0x41, 0x47, 0x3e, 0x42, 0x37, 0x40, 0x3b, 0x46, 0x37, 0x41, 0x3e, 0x3c, + 0x27, 0x40, 0x49, 0x42, 0x42, 0x39, 0x30, 0x49, 0x43, 0x38, 0x3d, 0x42, + 0x43, 0x2f, 0x3b, 0x37, 0x4b, 0x2d, 0x4f, 0x52, 0x30, 0x31, 0x2f, 0x3a, + 0x49, 0x38, 0x4f, 0x45, 0x2e, 0x47, 0x3a, 0x32, 0x33, 0x3f, 0x4a, 0x2e, + 0x33, 0x3b, 0x3e, 0x3e, 0x49, 0x45, 0x44, 0x38, 0x3c, 0x35, 0x45, 0x47, + 0x41, 0x3b, 0x3c, 0x48, 0x46, 0x39, 0x39, 0x3b, 0x3f, 0x41, 0x38, 0x42, + 0x3d, 0x46, 0x33, 0x41, 0x36, 0x3f, 0x3f, 0x3c, 0x33, 0x3e, 0x3e, 0x40, + 0x44, 0x40, 0x3c, 0x38, 0x46, 0x3a, 0x40, 0x36, 0x42, 0x35, 0x3f, 0x3b, + 0x3b, 0x43, 0x3c, 0x40, 0x40, 0x49, 0x2e, 0x39, 0x40, 0x3f, 0x45, 0x41, + 0x3f, 0x30, 0x42, 0x3d, 0x40, 0x3c, 0x3a, 0x3b, 0x3b, 0x40, 0x39, 0x42, + 0x3a, 0x3f, 0x3f, 0x3e, 0x35, 0x3b, 0x38, 0x45, 0x47, 0x35, 0x44, 0x3e, + 0x3b, 0x3f, 0x3f, 0x40, 0x3a, 0x35, 0x30, 0x49, 0x45, 0x35, 0x3b, 0x39, + 0x3b, 0x48, 0x3f, 0x37, 0x39, 0x40, 0x43, 0x45, 0x3d, 0x40, 0x41, 0x3a, + 0x33, 0x3d, 0x3a, 0x4b, 0x40, 0x42, 0x40, 0x42, 0x43, 0x39, 0x3c, 0x49, + 0x3e, 0x47, 0x3e, 0x44, 0x3f, 0x3a, 0x40, 0x41, 0x3f, 0x42, 0x42, 0x37, + 0x3e, 0x3b, 0x36, 0x3e, 0x3b, 0x3c, 0x48, 0x43, 0x2d, 0x46, 0x4a, 0x38, + 0x45, 0x3a, 0x29, 0x46, 0x40, 0x3c, 0x40, 0x44, 0x40, 0x33, 0x2f, 0x33, + 0x48, 0x2e, 0x51, 0x4f, 0x2b, 0x32, 0x2e, 0x2d, 0x45, 0x33, 0x4d, 0x41, + 0x29, 0x4b, 0x41, 0x39, 0x2f, 0x3a, 0x49, 0x31, 0x37, 0x40, 0x47, 0x4c, + 0x3e, 0x31, 0x41, 0x3f, 0x43, 0x37, 0x45, 0x4f, 0x41, 0x3c, 0x30, 0x4a, + 0x37, 0x37, 0x36, 0x39, 0x31, 0x3d, 0x36, 0x4b, 0x37, 0x44, 0x3c, 0x43, + 0x44, 0x36, 0x3f, 0x3b, 0x34, 0x3e, 0x3a, 0x35, 0x38, 0x3f, 0x33, 0x37, + 0x3b, 0x3d, 0x46, 0x38, 0x3b, 0x37, 0x38, 0x3b, 0x31, 0x3e, 0x3d, 0x3b, + 0x3d, 0x39, 0x35, 0x33, 0x33, 0x3c, 0x39, 0x39, 0x48, 0x39, 0x39, 0x3f, + 0x3e, 0x36, 0x47, 0x3a, 0x44, 0x40, 0x32, 0x3c, 0x37, 0x35, 0x40, 0x3f, + 0x3a, 0x38, 0x3b, 0x3d, 0x46, 0x45, 0x36, 0x43, 0x40, 0x3d, 0x41, 0x41, + 0x47, 0x3a, 0x3d, 0x3e, 0x43, 0x42, 0x32, 0x36, 0x41, 0x37, 0x3b, 0x35, + 0x36, 0x44, 0x36, 0x3c, 0x43, 0x32, 0x3e, 0x3e, 0x42, 0x45, 0x32, 0x3c, + 0x3a, 0x3b, 0x35, 0x43, 0x41, 0x3d, 0x44, 0x50, 0x43, 0x31, 0x3e, 0x44, + 0x44, 0x41, 0x3a, 0x44, 0x36, 0x39, 0x3b, 0x3c, 0x32, 0x38, 0x3b, 0x45, + 0x38, 0x43, 0x40, 0x42, 0x33, 0x3e, 0x4a, 0x42, 0x45, 0x39, 0x2f, 0x42, + 0x39, 0x35, 0x44, 0x3e, 0x39, 0x2f, 0x34, 0x33, 0x49, 0x29, 0x50, 0x4f, + 0x2b, 0x36, 0x34, 0x2d, 0x47, 0x33, 0x49, 0x3c, 0x33, 0x51, 0x49, 0x3f, + 0x34, 0x39, 0x4a, 0x2c, 0x34, 0x45, 0x4f, 0x47, 0x34, 0x42, 0x3a, 0x3d, + 0x36, 0x4a, 0x3b, 0x43, 0x36, 0x3f, 0x39, 0x4b, 0x38, 0x3a, 0x31, 0x3d, + 0x32, 0x42, 0x3a, 0x47, 0x48, 0x3e, 0x44, 0x3f, 0x39, 0x3e, 0x44, 0x35, + 0x41, 0x3c, 0x45, 0x3a, 0x3e, 0x3b, 0x3d, 0x2f, 0x37, 0x40, 0x3e, 0x43, + 0x39, 0x39, 0x33, 0x3b, 0x37, 0x3b, 0x37, 0x37, 0x37, 0x39, 0x36, 0x31, + 0x39, 0x3b, 0x41, 0x39, 0x3b, 0x40, 0x36, 0x37, 0x42, 0x39, 0x3a, 0x46, + 0x3f, 0x30, 0x38, 0x39, 0x35, 0x32, 0x3e, 0x3a, 0x43, 0x43, 0x3e, 0x33, + 0x42, 0x3f, 0x41, 0x3c, 0x46, 0x34, 0x34, 0x40, 0x43, 0x37, 0x32, 0x43, + 0x3c, 0x37, 0x36, 0x33, 0x3d, 0x36, 0x3a, 0x40, 0x39, 0x38, 0x32, 0x3e, + 0x32, 0x3d, 0x37, 0x49, 0x42, 0x47, 0x41, 0x3b, 0x3d, 0x3c, 0x3a, 0x37, + 0x3c, 0x45, 0x3a, 0x45, 0x36, 0x44, 0x3a, 0x3a, 0x3a, 0x3c, 0x43, 0x3b, + 0x3b, 0x35, 0x38, 0x47, 0x36, 0x40, 0x32, 0x43, 0x3e, 0x39, 0x42, 0x40, + 0x2c, 0x3c, 0x4c, 0x4c, 0x43, 0x3b, 0x37, 0x4a, 0x3f, 0x3c, 0x45, 0x44, + 0x3f, 0x30, 0x36, 0x31, 0x4f, 0x2f, 0x5d, 0x4b, 0x34, 0x34, 0x2d, 0x2b, + 0x44, 0x31, 0x4e, 0x40, 0x2e, 0x4d, 0x48, 0x3e, 0x37, 0x2b, 0x49, 0x25, + 0x31, 0x49, 0x44, 0x49, 0x39, 0x39, 0x4b, 0x3a, 0x3a, 0x41, 0x3e, 0x42, + 0x3c, 0x36, 0x36, 0x4a, 0x32, 0x44, 0x3e, 0x48, 0x3e, 0x3c, 0x37, 0x49, + 0x3d, 0x34, 0x3f, 0x37, 0x33, 0x36, 0x46, 0x3a, 0x3a, 0x31, 0x45, 0x3f, + 0x3a, 0x31, 0x3b, 0x33, 0x41, 0x42, 0x35, 0x39, 0x38, 0x44, 0x36, 0x3a, + 0x3f, 0x3b, 0x37, 0x3e, 0x3b, 0x38, 0x2f, 0x32, 0x44, 0x3d, 0x44, 0x41, + 0x39, 0x36, 0x3a, 0x34, 0x39, 0x38, 0x34, 0x3f, 0x3b, 0x37, 0x34, 0x34, + 0x40, 0x3d, 0x34, 0x3a, 0x46, 0x42, 0x3f, 0x34, 0x38, 0x33, 0x39, 0x44, + 0x3f, 0x41, 0x3c, 0x31, 0x40, 0x32, 0x3f, 0x37, 0x37, 0x41, 0x3e, 0x35, + 0x37, 0x48, 0x3b, 0x41, 0x3d, 0x3a, 0x3f, 0x35, 0x33, 0x3c, 0x36, 0x3b, + 0x3a, 0x48, 0x33, 0x42, 0x37, 0x33, 0x39, 0x41, 0x3c, 0x3d, 0x3b, 0x4d, + 0x39, 0x3a, 0x3e, 0x44, 0x3d, 0x41, 0x3b, 0x38, 0x49, 0x41, 0x3a, 0x38, + 0x34, 0x38, 0x38, 0x3c, 0x45, 0x3c, 0x37, 0x3b, 0x36, 0x3e, 0x4a, 0x4b, + 0x42, 0x3f, 0x32, 0x45, 0x46, 0x35, 0x46, 0x41, 0x38, 0x33, 0x39, 0x37, + 0x44, 0x2b, 0x60, 0x4a, 0x2a, 0x2e, 0x35, 0x2d, 0x43, 0x37, 0x51, 0x47, + 0x2f, 0x4d, 0x50, 0x3e, 0x3a, 0x33, 0x4f, 0x2a, 0x35, 0x45, 0x4a, 0x4c, + 0x3b, 0x3d, 0x43, 0x44, 0x3d, 0x3f, 0x4a, 0x3e, 0x49, 0x37, 0x2e, 0x4f, + 0x39, 0x3f, 0x32, 0x3c, 0x37, 0x3b, 0x39, 0x4d, 0x34, 0x3f, 0x46, 0x44, + 0x3d, 0x40, 0x3f, 0x40, 0x39, 0x33, 0x39, 0x3e, 0x3d, 0x40, 0x31, 0x30, + 0x35, 0x3d, 0x3e, 0x3a, 0x3e, 0x32, 0x31, 0x3e, 0x48, 0x3c, 0x40, 0x43, + 0x3f, 0x3f, 0x34, 0x2e, 0x3a, 0x3e, 0x3b, 0x43, 0x45, 0x32, 0x3a, 0x31, + 0x37, 0x38, 0x31, 0x35, 0x34, 0x3d, 0x42, 0x36, 0x46, 0x37, 0x32, 0x47, + 0x41, 0x3c, 0x35, 0x35, 0x36, 0x41, 0x3a, 0x3b, 0x42, 0x44, 0x36, 0x31, + 0x3c, 0x3d, 0x34, 0x34, 0x3b, 0x40, 0x40, 0x2e, 0x40, 0x46, 0x3b, 0x43, + 0x3f, 0x40, 0x3b, 0x3a, 0x32, 0x40, 0x46, 0x39, 0x3c, 0x49, 0x2f, 0x3d, + 0x49, 0x3e, 0x44, 0x3c, 0x3e, 0x35, 0x3f, 0x44, 0x41, 0x40, 0x3e, 0x47, + 0x3d, 0x40, 0x3f, 0x41, 0x3b, 0x41, 0x41, 0x3f, 0x40, 0x3f, 0x3e, 0x3e, + 0x3f, 0x43, 0x35, 0x40, 0x2b, 0x42, 0x45, 0x56, 0x40, 0x3c, 0x2f, 0x44, + 0x44, 0x3d, 0x3e, 0x3d, 0x40, 0x2d, 0x39, 0x31, 0x54, 0x2f, 0x61, 0x48, + 0x2e, 0x37, 0x37, 0x32, 0x3e, 0x2d, 0x52, 0x4d, 0x2d, 0x4d, 0x4c, 0x3a, + 0x3a, 0x31, 0x4e, 0x2d, 0x31, 0x48, 0x47, 0x54, 0x45, 0x38, 0x3b, 0x3d, + 0x42, 0x41, 0x44, 0x4a, 0x48, 0x42, 0x2f, 0x4d, 0x31, 0x34, 0x3a, 0x46, + 0x37, 0x44, 0x2c, 0x45, 0x46, 0x43, 0x40, 0x3f, 0x34, 0x33, 0x40, 0x39, + 0x32, 0x35, 0x3a, 0x40, 0x3f, 0x3f, 0x36, 0x32, 0x3f, 0x3d, 0x35, 0x48, + 0x3c, 0x48, 0x37, 0x39, 0x35, 0x3f, 0x37, 0x3d, 0x44, 0x46, 0x2d, 0x2a, + 0x47, 0x38, 0x3a, 0x39, 0x45, 0x3b, 0x40, 0x2d, 0x37, 0x33, 0x41, 0x3c, + 0x40, 0x35, 0x3f, 0x32, 0x3a, 0x36, 0x40, 0x41, 0x3a, 0x3c, 0x33, 0x31, + 0x42, 0x3f, 0x41, 0x3a, 0x41, 0x46, 0x38, 0x2f, 0x3c, 0x3d, 0x3d, 0x39, + 0x3b, 0x46, 0x41, 0x31, 0x46, 0x36, 0x40, 0x48, 0x3c, 0x33, 0x42, 0x32, + 0x3b, 0x40, 0x3f, 0x36, 0x37, 0x44, 0x34, 0x35, 0x32, 0x32, 0x37, 0x38, + 0x33, 0x3b, 0x37, 0x4a, 0x3f, 0x46, 0x3a, 0x41, 0x32, 0x37, 0x30, 0x3e, + 0x40, 0x35, 0x41, 0x40, 0x37, 0x41, 0x2b, 0x40, 0x3d, 0x3d, 0x32, 0x38, + 0x34, 0x3e, 0x47, 0x61, 0x43, 0x3b, 0x3c, 0x42, 0x46, 0x3d, 0x40, 0x4a, + 0x3c, 0x2d, 0x33, 0x35, 0x55, 0x38, 0x69, 0x4f, 0x33, 0x37, 0x30, 0x39, + 0x44, 0x2e, 0x58, 0x4b, 0x2a, 0x51, 0x4b, 0x3c, 0x39, 0x2e, 0x51, 0x2d, + 0x30, 0x4a, 0x42, 0x53, 0x3f, 0x39, 0x3e, 0x44, 0x3b, 0x40, 0x47, 0x44, + 0x47, 0x3e, 0x39, 0x4b, 0x40, 0x3d, 0x42, 0x39, 0x3b, 0x39, 0x32, 0x42, + 0x36, 0x36, 0x36, 0x42, 0x44, 0x34, 0x33, 0x40, 0x40, 0x40, 0x3a, 0x3a, + 0x41, 0x3f, 0x31, 0x30, 0x3f, 0x31, 0x30, 0x39, 0x46, 0x36, 0x35, 0x34, + 0x40, 0x43, 0x3c, 0x41, 0x31, 0x46, 0x35, 0x26, 0x44, 0x32, 0x3d, 0x35, + 0x3d, 0x3c, 0x36, 0x32, 0x39, 0x3a, 0x30, 0x40, 0x48, 0x3e, 0x38, 0x37, + 0x44, 0x3b, 0x3d, 0x42, 0x3d, 0x3c, 0x32, 0x2b, 0x3f, 0x41, 0x39, 0x3d, + 0x3e, 0x3f, 0x35, 0x2f, 0x46, 0x3d, 0x3d, 0x3b, 0x45, 0x37, 0x31, 0x35, + 0x44, 0x40, 0x3a, 0x45, 0x3a, 0x3c, 0x39, 0x31, 0x3b, 0x3d, 0x3b, 0x3a, + 0x43, 0x44, 0x39, 0x47, 0x44, 0x36, 0x3e, 0x39, 0x48, 0x3f, 0x39, 0x4b, + 0x3c, 0x36, 0x3d, 0x44, 0x44, 0x3f, 0x39, 0x43, 0x3f, 0x37, 0x3f, 0x37, + 0x3b, 0x3b, 0x38, 0x3b, 0x3f, 0x40, 0x31, 0x44, 0x30, 0x44, 0x46, 0x5b, + 0x46, 0x3f, 0x39, 0x40, 0x40, 0x37, 0x4a, 0x46, 0x3f, 0x36, 0x40, 0x39, + 0x59, 0x3e, 0x66, 0x57, 0x32, 0x34, 0x2e, 0x33, 0x46, 0x31, 0x58, 0x44, + 0x26, 0x4c, 0x4b, 0x3c, 0x39, 0x2e, 0x4d, 0x35, 0x32, 0x46, 0x52, 0x52, + 0x3e, 0x40, 0x39, 0x3c, 0x39, 0x3d, 0x53, 0x48, 0x41, 0x3c, 0x3b, 0x4d, + 0x3c, 0x3e, 0x38, 0x44, 0x3a, 0x3a, 0x29, 0x4a, 0x3c, 0x37, 0x36, 0x38, + 0x3a, 0x31, 0x37, 0x39, 0x3a, 0x40, 0x46, 0x32, 0x42, 0x38, 0x32, 0x2e, + 0x3a, 0x45, 0x44, 0x34, 0x34, 0x38, 0x32, 0x2e, 0x35, 0x40, 0x3a, 0x41, + 0x42, 0x3d, 0x37, 0x2c, 0x3f, 0x37, 0x3c, 0x3d, 0x3a, 0x36, 0x33, 0x35, + 0x3c, 0x34, 0x3c, 0x39, 0x3c, 0x3a, 0x37, 0x30, 0x30, 0x3e, 0x3d, 0x3a, + 0x44, 0x37, 0x36, 0x32, 0x36, 0x37, 0x36, 0x3a, 0x3c, 0x41, 0x3a, 0x35, + 0x36, 0x3a, 0x34, 0x40, 0x39, 0x40, 0x3e, 0x32, 0x34, 0x46, 0x33, 0x3f, + 0x36, 0x45, 0x3e, 0x35, 0x3f, 0x38, 0x3f, 0x3e, 0x3b, 0x3a, 0x36, 0x3b, + 0x36, 0x38, 0x32, 0x3f, 0x44, 0x3c, 0x35, 0x48, 0x38, 0x39, 0x31, 0x49, + 0x3d, 0x43, 0x36, 0x3f, 0x31, 0x43, 0x36, 0x3e, 0x3e, 0x41, 0x39, 0x3b, + 0x40, 0x42, 0x3c, 0x43, 0x36, 0x4a, 0x48, 0x67, 0x4e, 0x43, 0x36, 0x46, + 0x44, 0x3f, 0x4b, 0x4b, 0x3f, 0x38, 0x3c, 0x3c, 0x5e, 0x38, 0x70, 0x52, + 0x38, 0x32, 0x3b, 0x36, 0x4a, 0x2c, 0x52, 0x46, 0x29, 0x4f, 0x48, 0x42, + 0x2d, 0x2e, 0x4f, 0x28, 0x28, 0x45, 0x4d, 0x52, 0x42, 0x3e, 0x3f, 0x41, + 0x3c, 0x3a, 0x47, 0x50, 0x44, 0x45, 0x33, 0x4b, 0x3e, 0x3f, 0x42, 0x3d, + 0x43, 0x34, 0x27, 0x3f, 0x42, 0x3e, 0x43, 0x3e, 0x3a, 0x3c, 0x37, 0x3b, + 0x3f, 0x30, 0x3a, 0x3e, 0x3c, 0x34, 0x37, 0x24, 0x3d, 0x43, 0x40, 0x44, + 0x40, 0x46, 0x31, 0x2f, 0x43, 0x38, 0x38, 0x39, 0x3c, 0x34, 0x2d, 0x2a, + 0x38, 0x31, 0x43, 0x3b, 0x39, 0x3b, 0x32, 0x34, 0x3e, 0x39, 0x41, 0x3b, + 0x3e, 0x33, 0x3a, 0x2a, 0x41, 0x3f, 0x3c, 0x43, 0x3b, 0x3e, 0x35, 0x2c, + 0x38, 0x41, 0x33, 0x31, 0x3e, 0x3f, 0x3a, 0x3c, 0x3b, 0x35, 0x3f, 0x3d, + 0x42, 0x3a, 0x3c, 0x35, 0x3f, 0x40, 0x3c, 0x3e, 0x37, 0x41, 0x3d, 0x38, + 0x34, 0x31, 0x36, 0x3d, 0x3d, 0x47, 0x36, 0x44, 0x3f, 0x45, 0x3c, 0x3c, + 0x35, 0x36, 0x31, 0x4f, 0x46, 0x3a, 0x41, 0x42, 0x40, 0x32, 0x33, 0x41, + 0x34, 0x40, 0x3d, 0x43, 0x3b, 0x3a, 0x32, 0x3c, 0x42, 0x42, 0x3d, 0x43, + 0x37, 0x45, 0x45, 0xff, 0x4b, 0x45, 0x3b, 0x40, 0x43, 0x3e, 0x47, 0x49, + 0x3d, 0x3b, 0x3e, 0x33, 0x58, 0x35, 0x71, 0x54, 0x2f, 0x38, 0x38, 0x33, + 0x47, 0x35, 0x5b, 0x46, 0x2c, 0x4c, 0x43, 0x37, 0x36, 0x39, 0x4f, 0x30, + 0x26, 0x48, 0x51, 0x48, 0x46, 0x45, 0x3b, 0x39, 0x42, 0x50, 0x47, 0x4c, + 0x4b, 0x3b, 0x3d, 0x4d, 0x41, 0x34, 0x40, 0x44, 0x38, 0x32, 0x2d, 0x43, + 0x39, 0x36, 0x3b, 0x3b, 0x40, 0x3d, 0x37, 0x3c, 0x44, 0x39, 0x42, 0x37, + 0x38, 0x38, 0x32, 0x2f, 0x41, 0x40, 0x3f, 0x3a, 0x37, 0x35, 0x3b, 0x2a, + 0x37, 0x30, 0x3c, 0x37, 0x40, 0x38, 0x3a, 0x27, 0x44, 0x3d, 0x43, 0x40, + 0x35, 0x3f, 0x3e, 0x32, 0x3e, 0x3c, 0x40, 0x39, 0x39, 0x3a, 0x41, 0x31, + 0x3b, 0x3f, 0x34, 0x43, 0x3a, 0x38, 0x42, 0x2a, 0x47, 0x46, 0x3b, 0x38, + 0x47, 0x45, 0x39, 0x31, 0x43, 0x40, 0x37, 0x3a, 0x3d, 0x3e, 0x39, 0x30, + 0x36, 0x37, 0x3a, 0x43, 0x3f, 0x32, 0x31, 0x41, 0x45, 0x3e, 0x43, 0x38, + 0x3f, 0x37, 0x3c, 0x49, 0x3b, 0x33, 0x3d, 0x3a, 0x37, 0x44, 0x32, 0x50, + 0x39, 0x44, 0x3e, 0x3f, 0x3d, 0x41, 0x3e, 0x3e, 0x42, 0x44, 0x45, 0x3f, + 0x36, 0x3f, 0x37, 0x39, 0x3b, 0x3d, 0x3b, 0x3b, 0x2f, 0x46, 0x40, 0x6d, + 0x50, 0x45, 0x3b, 0x45, 0x46, 0x3b, 0x42, 0x48, 0x42, 0x3c, 0x39, 0x37, + 0x57, 0x3b, 0x6c, 0x5b, 0x32, 0x35, 0x3d, 0x39, 0x48, 0x31, 0x5c, 0x46, + 0x29, 0x4c, 0x3f, 0x3e, 0x37, 0x33, 0x58, 0x32, 0x2a, 0x43, 0x4c, 0x50, + 0x3b, 0x44, 0x3c, 0x41, 0x39, 0x48, 0x55, 0x4c, 0x42, 0x38, 0x3b, 0x51, + 0x3f, 0x38, 0x44, 0x46, 0x36, 0x3b, 0x38, 0x4a, 0x3f, 0x37, 0x36, 0x3c, + 0x31, 0x3d, 0x32, 0x39, 0x3b, 0x3f, 0x3e, 0x35, 0x38, 0x3f, 0x34, 0x2b, + 0x37, 0x36, 0x39, 0x40, 0x37, 0x41, 0x32, 0x27, 0x36, 0x33, 0x40, 0x3a, + 0x3f, 0x44, 0x3f, 0x25, 0x38, 0x34, 0x42, 0x3c, 0x3a, 0x40, 0x38, 0x31, + 0x49, 0x3e, 0x33, 0x3d, 0x31, 0x36, 0x39, 0x2b, 0x44, 0x2f, 0x43, 0x34, + 0x34, 0x37, 0x39, 0x33, 0x3b, 0x34, 0x42, 0x3c, 0x40, 0x45, 0x36, 0x31, + 0x43, 0x47, 0x3e, 0x3f, 0x40, 0x3a, 0x33, 0x34, 0x41, 0x44, 0x3a, 0x43, + 0x3e, 0x38, 0x36, 0x31, 0x42, 0x44, 0x40, 0x41, 0x44, 0x43, 0x33, 0x42, + 0x3d, 0x41, 0x3d, 0x3e, 0x3c, 0x39, 0x3e, 0x4f, 0x3f, 0x37, 0x31, 0x40, + 0x3b, 0x38, 0x35, 0x3b, 0x44, 0x41, 0x41, 0x37, 0x40, 0x42, 0x2d, 0x3d, + 0x39, 0x48, 0x44, 0x3e, 0x34, 0x48, 0x49, 0x6d, 0x45, 0x4b, 0x3a, 0x44, + 0x49, 0x40, 0x4d, 0x51, 0x3f, 0x34, 0x3b, 0x40, 0x52, 0x34, 0x6f, 0x56, + 0x33, 0x3e, 0x40, 0x39, 0x41, 0x32, 0x5d, 0x45, 0x2e, 0x51, 0x48, 0x3c, + 0x2e, 0x2e, 0x51, 0x39, 0x32, 0x45, 0x4a, 0x4c, 0x3b, 0x40, 0x40, 0x3b, + 0x36, 0x41, 0x54, 0x4e, 0x4a, 0x49, 0x3b, 0x4d, 0x3c, 0x41, 0x38, 0x47, + 0x3d, 0x3c, 0x37, 0x48, 0x3f, 0x42, 0x3e, 0x36, 0x39, 0x46, 0x37, 0x3e, + 0x3b, 0x38, 0x40, 0x3b, 0x39, 0x32, 0x3e, 0x29, 0x37, 0x35, 0x3c, 0x3d, + 0x37, 0x3b, 0x35, 0x2f, 0x32, 0x3b, 0x37, 0x3c, 0x40, 0x3e, 0x39, 0x27, + 0x3b, 0x38, 0x37, 0x36, 0x39, 0x37, 0x37, 0x35, 0x42, 0x3e, 0x3b, 0x43, + 0x41, 0x3c, 0x37, 0x2a, 0x3a, 0x3e, 0x38, 0x40, 0x36, 0x3e, 0x44, 0x2e, + 0x3e, 0x3a, 0x37, 0x3b, 0x3e, 0x41, 0x3d, 0x30, 0x3b, 0x3f, 0x41, 0x45, + 0x3a, 0x48, 0x37, 0x2f, 0x3a, 0x37, 0x34, 0x43, 0x42, 0x3d, 0x38, 0x41, + 0x3b, 0x3c, 0x39, 0x3c, 0x39, 0x47, 0x2e, 0x41, 0x42, 0x40, 0x32, 0x36, + 0x43, 0x40, 0x3d, 0x4c, 0x38, 0x3e, 0x3b, 0x41, 0x3d, 0x3b, 0x34, 0x43, + 0x43, 0x3f, 0x44, 0x3c, 0x3a, 0x33, 0x39, 0x42, 0x43, 0x3f, 0x33, 0x3d, + 0x33, 0x3e, 0x48, 0x6b, 0x48, 0x43, 0x36, 0x47, 0x49, 0x44, 0x4a, 0x49, + 0x3c, 0x31, 0x35, 0x3e, 0x5c, 0x34, 0x73, 0x53, 0x33, 0x3c, 0x32, 0x3b, + 0x43, 0x27, 0x59, 0x4e, 0x2b, 0x51, 0x4f, 0x37, 0x36, 0x34, 0x56, 0x34, + 0x32, 0x4f, 0x46, 0x50, 0x40, 0x40, 0x3c, 0x3e, 0x34, 0x37, 0x50, 0x49, + 0x43, 0x47, 0x3e, 0x52, 0x44, 0x38, 0x3b, 0x4f, 0x3a, 0x3d, 0x2b, 0x4c, + 0x40, 0x38, 0x3a, 0x35, 0x3a, 0x3a, 0x3d, 0x38, 0x3d, 0x3b, 0x37, 0x48, + 0x3d, 0x3d, 0x32, 0x30, 0x3a, 0x34, 0x3f, 0x3a, 0x3b, 0x3e, 0x35, 0x2f, + 0x3b, 0x3a, 0x45, 0x3d, 0x42, 0x33, 0x33, 0x24, 0x44, 0x39, 0x3c, 0x3d, + 0x41, 0x3c, 0x37, 0x2c, 0x3b, 0x36, 0x34, 0x41, 0x3d, 0x3f, 0x39, 0x32, + 0x3c, 0x40, 0x44, 0x3d, 0x41, 0x3d, 0x3a, 0x29, 0x3e, 0x3e, 0x43, 0x33, + 0x3f, 0x3e, 0x3e, 0x31, 0x38, 0x3a, 0x34, 0x3d, 0x3f, 0x3e, 0x3a, 0x3d, + 0x3e, 0x48, 0x45, 0x3d, 0x44, 0x37, 0x33, 0x3d, 0x45, 0x39, 0x40, 0x40, + 0x42, 0x3f, 0x3f, 0x3d, 0x3a, 0x3b, 0x41, 0x33, 0x41, 0x3c, 0x32, 0x55, + 0x43, 0x3a, 0x32, 0x40, 0x3c, 0x3e, 0x40, 0x43, 0x37, 0x3f, 0x40, 0x38, + 0x43, 0x41, 0x36, 0x42, 0x44, 0x3c, 0x32, 0x3f, 0x38, 0x42, 0x46, 0x59, + 0x4c, 0x41, 0x39, 0x47, 0x46, 0x46, 0x44, 0x44, 0x35, 0x42, 0x32, 0x39, + 0x4f, 0x34, 0x6d, 0x55, 0x31, 0x3b, 0x3a, 0x3f, 0x44, 0x2c, 0x5d, 0x43, + 0x26, 0x4a, 0x4f, 0x40, 0x36, 0x32, 0x4d, 0x33, 0x2f, 0x50, 0x4d, 0x57, + 0x3b, 0x40, 0x42, 0x44, 0x41, 0x3f, 0x52, 0x4e, 0x35, 0x41, 0x44, 0x52, + 0x40, 0x35, 0x39, 0x4b, 0x45, 0x34, 0x2c, 0x4a, 0x3b, 0x41, 0x31, 0x33, + 0x3f, 0x3a, 0x36, 0x3c, 0x3c, 0x33, 0x30, 0x38, 0x43, 0x3f, 0x32, 0x2d, + 0x3f, 0x3a, 0x38, 0x41, 0x39, 0x45, 0x36, 0x2e, 0x3c, 0x38, 0x45, 0x3f, + 0x40, 0x3f, 0x3e, 0x26, 0x41, 0x37, 0x3c, 0x44, 0x3f, 0x3f, 0x35, 0x37, + 0x46, 0x34, 0x37, 0x3e, 0x48, 0x38, 0x36, 0x34, 0x33, 0x39, 0x40, 0x3c, + 0x42, 0x3d, 0x3b, 0x31, 0x38, 0x3b, 0x44, 0x42, 0x45, 0x38, 0x41, 0x30, + 0x3d, 0x42, 0x36, 0x3f, 0x3b, 0x45, 0x37, 0x32, 0x3c, 0x37, 0x3d, 0x42, + 0x38, 0x3d, 0x2f, 0x31, 0x39, 0x40, 0x3f, 0x44, 0x3a, 0x41, 0x44, 0x46, + 0x3d, 0x3a, 0x32, 0x3b, 0x34, 0x47, 0x36, 0x4c, 0x47, 0x35, 0x3c, 0x33, + 0x3b, 0x3c, 0x30, 0x43, 0x43, 0x3f, 0x31, 0x40, 0x3a, 0x37, 0x30, 0x46, + 0x39, 0x3b, 0x42, 0x40, 0x2d, 0x3f, 0x3e, 0x6a, 0x50, 0x3b, 0x31, 0x54, + 0x47, 0x3d, 0x48, 0x4e, 0x3b, 0x41, 0x3a, 0x39, 0x49, 0x36, 0x64, 0x4e, + 0x32, 0x39, 0x3d, 0x37, 0x42, 0x2c, 0x5c, 0x43, 0x2a, 0x4b, 0x4b, 0x46, + 0x30, 0x29, 0x52, 0x31, 0x35, 0x44, 0x4a, 0x4b, 0x3d, 0x3b, 0x4e, 0x42, + 0x3d, 0x39, 0x42, 0x52, 0x3f, 0x36, 0x3e, 0x50, 0x3f, 0x32, 0x35, 0x3a, + 0x40, 0x39, 0x35, 0x48, 0x3b, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x2f, 0x36, + 0x38, 0x34, 0x3f, 0x44, 0x32, 0x3f, 0x37, 0x33, 0x33, 0x35, 0x2e, 0x41, + 0x37, 0x3e, 0x38, 0x28, 0x49, 0x30, 0x46, 0x39, 0x3b, 0x30, 0x38, 0x28, + 0x3b, 0x3d, 0x3a, 0x43, 0x3f, 0x34, 0x43, 0x36, 0x39, 0x3c, 0x3e, 0x3e, + 0x39, 0x3b, 0x39, 0x32, 0x3c, 0x36, 0x3e, 0x38, 0x34, 0x3c, 0x3a, 0x2a, + 0x46, 0x3d, 0x40, 0x37, 0x3b, 0x39, 0x3b, 0x34, 0x38, 0x31, 0x43, 0x46, + 0x3b, 0x43, 0x39, 0x2b, 0x38, 0x40, 0x3e, 0x39, 0x35, 0x3d, 0x2c, 0x36, + 0x37, 0x40, 0x36, 0x40, 0x41, 0x38, 0x32, 0x3f, 0x36, 0x46, 0x34, 0x31, + 0x40, 0x3e, 0x3c, 0x4e, 0x42, 0x3d, 0x36, 0x3f, 0x42, 0x3f, 0x33, 0x40, + 0x34, 0x37, 0x3c, 0x3b, 0x31, 0x47, 0x32, 0x3c, 0x34, 0x3d, 0x42, 0x3b, + 0x37, 0x41, 0x3b, 0x64, 0x52, 0x40, 0x36, 0x4e, 0x46, 0x3f, 0x3f, 0x47, + 0x3c, 0x3a, 0x3a, 0x41, 0x4a, 0x32, 0x5e, 0x50, 0x2d, 0x39, 0x3a, 0x38, + 0x3d, 0x2c, 0x5a, 0x3e, 0x2e, 0x47, 0x3e, 0x3e, 0x33, 0x29, 0x4c, 0x35, + 0x30, 0x4d, 0x4d, 0x4d, 0x38, 0x42, 0x51, 0x47, 0x39, 0x3c, 0x43, 0x4b, + 0x42, 0x3f, 0x3a, 0x4b, 0x44, 0x3f, 0x3a, 0x44, 0x3e, 0x37, 0x30, 0x45, + 0x3d, 0x36, 0x34, 0x3f, 0x36, 0x35, 0x37, 0x36, 0x43, 0x3b, 0x37, 0x3e, + 0x35, 0x3e, 0x32, 0x34, 0x32, 0x38, 0x3c, 0x3a, 0x3a, 0x3c, 0x30, 0x2b, + 0x31, 0x37, 0x30, 0x42, 0x36, 0x37, 0x36, 0x2c, 0x3c, 0x31, 0x41, 0x37, + 0x44, 0x41, 0x3b, 0x37, 0x41, 0x3f, 0x38, 0x3b, 0x3a, 0x3a, 0x3c, 0x2f, + 0x47, 0x41, 0x3e, 0x33, 0x42, 0x3a, 0x32, 0x34, 0x44, 0x40, 0x43, 0x3d, + 0x34, 0x41, 0x38, 0x35, 0x35, 0x3b, 0x45, 0x38, 0x32, 0x37, 0x3c, 0x2e, + 0x39, 0x40, 0x30, 0x3e, 0x42, 0x35, 0x3d, 0x36, 0x3e, 0x3d, 0x39, 0x46, + 0x3f, 0x36, 0x37, 0x49, 0x41, 0x39, 0x3d, 0x3d, 0x33, 0x44, 0x42, 0x50, + 0x3d, 0x3c, 0x3e, 0x3f, 0x42, 0x42, 0x3b, 0x3d, 0x41, 0x31, 0x39, 0x3a, + 0x44, 0x34, 0x38, 0x47, 0x44, 0x38, 0x3b, 0x42, 0x30, 0x42, 0x44, 0x57, + 0x49, 0x3a, 0x39, 0x4f, 0x41, 0x3e, 0x40, 0x43, 0x37, 0x42, 0x3b, 0x48, + 0x50, 0x29, 0x5b, 0x44, 0x2c, 0x40, 0x3f, 0x3c, 0x46, 0x34, 0x5c, 0x41, + 0x2c, 0x48, 0x46, 0x46, 0x35, 0x32, 0x4c, 0x35, 0x2f, 0x3b, 0x48, 0x44, + 0x41, 0x41, 0x49, 0x45, 0x34, 0x37, 0x44, 0x45, 0x43, 0x3b, 0x42, 0x44, + 0x3a, 0x37, 0x48, 0x49, 0x34, 0x39, 0x33, 0x4a, 0x40, 0x3d, 0x33, 0x39, + 0x39, 0x3b, 0x30, 0x31, 0x3d, 0x47, 0x3c, 0x3a, 0x34, 0x3c, 0x3a, 0x2b, + 0x3a, 0x34, 0x41, 0x40, 0x42, 0x36, 0x44, 0x2c, 0x40, 0x47, 0x3b, 0x37, + 0x38, 0x42, 0x44, 0x29, 0x36, 0x3d, 0x3d, 0x36, 0x42, 0x3b, 0x35, 0x36, + 0x43, 0x39, 0x41, 0x3d, 0x45, 0x41, 0x31, 0x32, 0x40, 0x3d, 0x3c, 0x41, + 0x3e, 0x3d, 0x35, 0x34, 0x32, 0x38, 0x36, 0x3f, 0x3b, 0x3d, 0x39, 0x36, + 0x40, 0x3e, 0x3d, 0x3a, 0x3a, 0x3b, 0x3c, 0x32, 0x40, 0x34, 0x3a, 0x36, + 0x42, 0x47, 0x3e, 0x33, 0x3a, 0x44, 0x30, 0x39, 0x40, 0x3a, 0x36, 0x44, + 0x3c, 0x3b, 0x3f, 0x33, 0x3e, 0x3c, 0x35, 0x53, 0x43, 0x3c, 0x3f, 0x43, + 0x3d, 0x44, 0x33, 0x47, 0x42, 0x40, 0x37, 0x3b, 0x43, 0x3f, 0x33, 0x41, + 0x38, 0x42, 0x44, 0x3d, 0x2d, 0x3f, 0x46, 0x49, 0x4e, 0x3f, 0x36, 0x45, + 0x45, 0x39, 0x40, 0x42, 0x39, 0x39, 0x3a, 0x42, 0x45, 0x2c, 0x61, 0x44, + 0x30, 0x45, 0x38, 0x3a, 0x40, 0x37, 0x58, 0x39, 0x31, 0x3e, 0x3a, 0x3e, + 0x37, 0x32, 0x4a, 0x39, 0x2e, 0x47, 0x3e, 0x4e, 0x3f, 0x3e, 0x48, 0x45, + 0x3f, 0x48, 0x3a, 0x3f, 0x40, 0x36, 0x3a, 0x44, 0x36, 0x3e, 0x3d, 0x41, + 0x45, 0x36, 0x36, 0x4b, 0x3a, 0x3d, 0x45, 0x48, 0x38, 0x45, 0x39, 0x38, + 0x38, 0x3a, 0x42, 0x34, 0x3f, 0x34, 0x39, 0x34, 0x32, 0x3f, 0x3c, 0x3d, + 0x3d, 0x47, 0x3a, 0x2f, 0x3c, 0x3e, 0x3f, 0x39, 0x35, 0x42, 0x3c, 0x2a, + 0x3b, 0x35, 0x42, 0x44, 0x46, 0x39, 0x38, 0x39, 0x43, 0x3a, 0x38, 0x42, + 0x3d, 0x3a, 0x40, 0x35, 0x34, 0x39, 0x3a, 0x38, 0x43, 0x42, 0x42, 0x2d, + 0x31, 0x3b, 0x33, 0x40, 0x3b, 0x47, 0x35, 0x30, 0x3a, 0x3c, 0x3b, 0x47, + 0x3a, 0x3c, 0x38, 0x35, 0x3c, 0x35, 0x3e, 0x3e, 0x39, 0x3d, 0x39, 0x40, + 0x37, 0x33, 0x49, 0x38, 0x3c, 0x43, 0x34, 0x40, 0x39, 0x42, 0x3c, 0x3b, + 0x3e, 0x45, 0x3e, 0x51, 0x3d, 0x3f, 0x3b, 0x34, 0x37, 0x3c, 0x40, 0x47, + 0x3c, 0x41, 0x3f, 0x41, 0x37, 0x3e, 0x36, 0x3c, 0x42, 0x40, 0x3f, 0x3a, + 0x3b, 0x42, 0x44, 0x4b, 0x4b, 0x37, 0x41, 0x4d, 0x41, 0x45, 0x40, 0x41, + 0x40, 0x38, 0x37, 0x40, 0x42, 0x2c, 0x57, 0x43, 0x2d, 0x49, 0x3a, 0x3e, + 0x37, 0x2f, 0x52, 0x37, 0x31, 0x42, 0x3b, 0x3f, 0x39, 0x38, 0x48, 0x3c, + 0x37, 0x3d, 0x3a, 0x39, 0x3a, 0x45, 0x4b, 0x49, 0x3e, 0x44, 0x48, 0x49, + 0x3d, 0x39, 0x3c, 0x41, 0x41, 0x38, 0x45, 0x38, 0x33, 0x3d, 0x37, 0x47, + 0x34, 0x3f, 0x3b, 0x3d, 0x39, 0x34, 0x30, 0x39, 0x44, 0x36, 0x34, 0x3c, + 0x37, 0x38, 0x45, 0x34, 0x40, 0x33, 0x41, 0x3a, 0x3e, 0x3c, 0x3b, 0x3a, + 0x40, 0x3f, 0x3b, 0x3d, 0x3b, 0x46, 0x41, 0x2a, 0x3a, 0x3c, 0x42, 0x46, + 0x33, 0x3f, 0x2d, 0x3a, 0x45, 0x45, 0x38, 0x3b, 0x44, 0x34, 0x35, 0x3f, + 0x34, 0x43, 0x38, 0x3e, 0x41, 0x3b, 0x42, 0x38, 0x3d, 0x3f, 0x38, 0x45, + 0x3b, 0x35, 0x39, 0x3c, 0x43, 0x43, 0x38, 0x34, 0x44, 0x43, 0x2e, 0x39, + 0x39, 0x40, 0x39, 0x41, 0x41, 0x34, 0x3e, 0x44, 0x3d, 0x43, 0x3a, 0x3a, + 0x3b, 0x3b, 0x36, 0x45, 0x3c, 0x43, 0x3d, 0x48, 0x36, 0x36, 0x39, 0x55, + 0x35, 0x40, 0x3e, 0x49, 0x40, 0x3a, 0x3d, 0x3d, 0x34, 0x47, 0x40, 0x41, + 0x40, 0x47, 0x39, 0x3e, 0x3b, 0x38, 0x3c, 0x3a, 0x35, 0x3e, 0x41, 0x4a, + 0x4b, 0x3f, 0x36, 0x3d, 0x40, 0x3c, 0x39, 0x32, 0x33, 0x36, 0x30, 0x42, + 0x42, 0x36, 0x54, 0x48, 0x2e, 0x4c, 0x34, 0x3c, 0x39, 0x36, 0x4e, 0x37, + 0x2f, 0x3e, 0x30, 0x3d, 0x36, 0x3b, 0x45, 0x36, 0x37, 0x3e, 0x41, 0x4b, + 0x3b, 0x36, 0x45, 0x3b, 0x38, 0x45, 0x3e, 0x43, 0x48, 0x46, 0x44, 0x44, + 0x3e, 0x3b, 0x37, 0x3b, 0x3a, 0x3f, 0x3d, 0x44, 0x39, 0x38, 0x45, 0x43, + 0x3d, 0x35, 0x39, 0x2c, 0x44, 0x41, 0x36, 0x40, 0x3d, 0x39, 0x3d, 0x2f, + 0x3d, 0x39, 0x42, 0x3d, 0x36, 0x46, 0x43, 0x2c, 0x41, 0x3a, 0x30, 0x45, + 0x3f, 0x41, 0x35, 0x2b, 0x3b, 0x38, 0x3a, 0x44, 0x32, 0x32, 0x39, 0x3c, + 0x3a, 0x3a, 0x3c, 0x3a, 0x35, 0x40, 0x3b, 0x31, 0x36, 0x33, 0x35, 0x34, + 0x3c, 0x3b, 0x3d, 0x36, 0x48, 0x3b, 0x3f, 0x42, 0x3e, 0x33, 0x2f, 0x3a, + 0x49, 0x41, 0x39, 0x3e, 0x3c, 0x44, 0x3c, 0x39, 0x33, 0x39, 0x36, 0x35, + 0x3d, 0x42, 0x34, 0x3e, 0x38, 0x45, 0x40, 0x45, 0x3d, 0x48, 0x42, 0x4a, + 0x3f, 0x45, 0x38, 0x42, 0x44, 0x40, 0x34, 0x49, 0x44, 0x3d, 0x3a, 0x39, + 0x3e, 0x3a, 0x42, 0x3e, 0x48, 0x42, 0x3e, 0x3a, 0x3f, 0x3f, 0x32, 0x3b, + 0x38, 0x41, 0x3c, 0x39, 0x33, 0x45, 0x44, 0x3c, 0x48, 0x41, 0x41, 0x3d, + 0x3a, 0x3c, 0x37, 0x33, 0x41, 0x3f, 0x38, 0x3a, 0x3f, 0x37, 0x51, 0x3c, + 0x37, 0x3a, 0x43, 0x37, 0x40, 0x31, 0x4f, 0x34, 0x3b, 0x44, 0x45, 0x39, + 0x40, 0x33, 0x49, 0x33, 0x3e, 0x35, 0x44, 0x3d, 0x3b, 0x3f, 0x43, 0x41, + 0x43, 0x43, 0x48, 0x44, 0x46, 0x3b, 0x43, 0x3f, 0x3c, 0x3f, 0x3e, 0x3d, + 0x3b, 0x41, 0x3c, 0x43, 0x30, 0x34, 0x39, 0x33, 0x3f, 0x38, 0x36, 0x2e, + 0x33, 0x3f, 0x3c, 0x40, 0x3d, 0x3b, 0x3b, 0x31, 0x36, 0x41, 0x3b, 0x38, + 0x46, 0x36, 0x34, 0x31, 0x42, 0x44, 0x33, 0x35, 0x3f, 0x36, 0x3c, 0x30, + 0x3f, 0x31, 0x39, 0x3e, 0x3f, 0x47, 0x3e, 0x34, 0x36, 0x36, 0x34, 0x39, + 0x37, 0x46, 0x40, 0x33, 0x3b, 0x3a, 0x3f, 0x41, 0x37, 0x44, 0x3a, 0x3f, + 0x34, 0x45, 0x37, 0x33, 0x3f, 0x47, 0x41, 0x36, 0x39, 0x3e, 0x40, 0x38, + 0x41, 0x3d, 0x3d, 0x36, 0x40, 0x3a, 0x3b, 0x3b, 0x41, 0x3b, 0x3a, 0x3f, + 0x3f, 0x3b, 0x35, 0x42, 0x46, 0x3a, 0x30, 0x45, 0x40, 0x37, 0x39, 0x39, + 0x3d, 0x38, 0x3f, 0x45, 0x3f, 0x31, 0x32, 0x3b, 0x35, 0x3e, 0x3b, 0x38, + 0x3b, 0x44, 0x37, 0x39, 0x37, 0x42, 0x3f, 0x44, 0x38, 0x36, 0x37, 0x44, + 0x45, 0x46, 0x41, 0x3b, 0x46, 0x42, 0x43, 0x43, 0x3a, 0x4b, 0x37, 0x35, + 0x3b, 0x40, 0x32, 0x38, 0x41, 0x38, 0x4f, 0x3e, 0x36, 0x3f, 0x47, 0x3b, + 0x47, 0x3b, 0x4a, 0x2e, 0x3d, 0x45, 0x3b, 0x46, 0x3e, 0x38, 0x43, 0x38, + 0x41, 0x48, 0x3a, 0x39, 0x40, 0x45, 0x3b, 0x43, 0x40, 0x3e, 0x43, 0x41, + 0x41, 0x3e, 0x39, 0x3f, 0x35, 0x42, 0x33, 0x3f, 0x3d, 0x32, 0x45, 0x3c, + 0x41, 0x31, 0x45, 0x38, 0x43, 0x45, 0x41, 0x35, 0x35, 0x40, 0x44, 0x36, + 0x3a, 0x3b, 0x3c, 0x2c, 0x3e, 0x41, 0x33, 0x3d, 0x46, 0x34, 0x3b, 0x30, + 0x30, 0x42, 0x43, 0x3d, 0x3d, 0x3d, 0x43, 0x31, 0x3f, 0x40, 0x3a, 0x3f, + 0x48, 0x3e, 0x3b, 0x39, 0x44, 0x43, 0x3b, 0x3a, 0x42, 0x38, 0x38, 0x3b, + 0x3f, 0x44, 0x37, 0x3e, 0x45, 0x40, 0x41, 0x3b, 0x3c, 0x3a, 0x38, 0x37, + 0x3b, 0x33, 0x3f, 0x35, 0x43, 0x3d, 0x33, 0x41, 0x3b, 0x46, 0x39, 0x32, + 0x39, 0x3f, 0x3b, 0x39, 0x47, 0x3c, 0x3f, 0x39, 0x34, 0x3d, 0x3c, 0x46, + 0x3f, 0x3e, 0x3e, 0x44, 0x34, 0x40, 0x3f, 0x39, 0x3c, 0x38, 0x36, 0x45, + 0x42, 0x46, 0x3b, 0x44, 0x3a, 0x3d, 0x3b, 0x42, 0x3b, 0x3b, 0x3c, 0x45, + 0x42, 0x3d, 0x36, 0x37, 0x3d, 0x43, 0x3f, 0x48, 0xa6, 0xfb, 0xff, 0xff, + 0x04, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xb3, 0x00, 0x00, 0x00, + 0x39, 0xff, 0xff, 0xff, 0xe5, 0xff, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, + 0x68, 0xfb, 0xff, 0xff, 0xbc, 0xfc, 0xff, 0xff, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xe8, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, 0x00, - 0x58, 0x01, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xd8, 0x00, 0x00, 0x00, - 0x38, 0x02, 0x00, 0x00, 0x9c, 0x02, 0x00, 0x00, 0xa0, 0x01, 0x00, 0x00, - 0x14, 0x03, 0x00, 0x00, 0xfe, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, - 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x70, 0x02, 0x00, 0x00, + 0x70, 0x03, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xf0, 0x01, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x50, 0x01, 0x00, 0x00, + 0xa4, 0x02, 0x00, 0x00, 0xba, 0xfc, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x24, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, + 0x6c, 0x73, 0x5f, 0x73, 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x3c, 0xfd, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3b, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0xfd, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x03, 0x1c, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x61, 0x64, 0x64, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xa4, 0xfd, 0xff, 0xff, + 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x97, 0xf5, 0x3f, + 0x01, 0x00, 0x00, 0x00, 0x87, 0x35, 0xa0, 0x43, 0x01, 0x00, 0x00, 0x00, + 0xd6, 0xd7, 0x28, 0xc3, 0x92, 0xfd, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x1c, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x52, 0x65, 0x6c, 0x75, 0x00, 0x00, 0x00, 0x00, - 0xcc, 0xfc, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x14, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x17, 0xbf, 0xd2, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x58, 0xec, 0xd1, 0x43, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0xfd, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x08, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x43, 0x6f, 0x6e, 0x76, - 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x34, 0xff, 0xff, 0xff, + 0x05, 0x80, 0xbf, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x85, 0xc0, 0xbe, 0x43, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xfe, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x03, 0x3c, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, + 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, + 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, + 0x73, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xa4, 0xfe, 0xff, 0xff, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, 0xc2, 0xfd, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, - 0x61, 0x70, 0x65, 0x5f, 0x31, 0x00, 0x00, 0x00, 0x94, 0xfd, 0xff, 0xff, - 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x17, 0xac, 0x6e, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0x20, 0x4e, 0x97, 0x3d, 0x01, 0x00, 0x00, 0x00, 0xaf, 0x27, 0x21, 0xbe, + 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x20, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x52, 0x65, 0x73, 0x68, 0x61, 0x70, 0x65, 0x5f, + 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x1c, 0xff, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x80, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x43, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0xfe, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x10, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d, - 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x0c, 0x00, 0x0c, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x42, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x02, 0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x43, 0x6f, 0x6e, 0x76, 0x32, 0x44, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xfc, 0xfe, 0xff, 0xff, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0xc5, 0x01, 0x2a, 0x3b, 0x96, 0xfe, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, - 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x44, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x0a, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, - 0x25, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, - 0x71, 0x75, 0x61, 0x6e, 0x74, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, - 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, - 0x78, 0x56, 0x61, 0x72, 0x73, 0x00, 0x00, 0x00, 0x84, 0xfe, 0xff, 0xff, - 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xab, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xf5, 0xf7, 0x84, 0x3a, - 0x01, 0x00, 0x00, 0x00, 0x6e, 0x88, 0xae, 0x3d, 0x01, 0x00, 0x00, 0x00, - 0xd4, 0x97, 0x30, 0xbe, 0x26, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, - 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x61, 0x64, 0x64, 0x5f, - 0x31, 0x00, 0x00, 0x00, 0xec, 0xfe, 0xff, 0xff, 0x2c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x2f, 0xad, 0x18, 0x40, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x38, 0xa2, 0x43, 0x01, 0x00, 0x00, 0x00, 0x02, 0xf1, 0x8d, 0xc3, - 0x8e, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x0e, 0x00, 0x00, 0x00, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x5f, 0x73, - 0x6f, 0x66, 0x74, 0x6d, 0x61, 0x78, 0x00, 0x00, 0x5c, 0xff, 0xff, 0xff, - 0x2c, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, + 0x17, 0xac, 0xee, 0x39, 0x5a, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x03, + 0x48, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x77, 0x65, 0x69, 0x67, + 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, 0x74, 0x5f, 0x31, 0x2f, + 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, 0x74, 0x57, 0x69, 0x74, + 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, 0x61, 0x72, 0x73, 0x2f, + 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, 0x65, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, + 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x3d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x9d, 0xaf, 0xd0, 0x3a, 0x01, 0x00, 0x00, 0x00, + 0xe7, 0x29, 0x9e, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x5b, 0x91, 0xc3, 0xbd, + 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, 0x07, 0x00, 0x0c, 0x00, + 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x4d, 0x61, 0x74, 0x4d, + 0x75, 0x6c, 0x5f, 0x62, 0x69, 0x61, 0x73, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3b, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x3f, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x18, 0x00, 0x08, 0x00, - 0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x14, 0x00, 0x0e, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x03, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x14, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x30, 0x11, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, - 0x77, 0x65, 0x69, 0x67, 0x68, 0x74, 0x73, 0x5f, 0x71, 0x75, 0x61, 0x6e, - 0x74, 0x5f, 0x31, 0x2f, 0x46, 0x61, 0x6b, 0x65, 0x51, 0x75, 0x61, 0x6e, - 0x74, 0x57, 0x69, 0x74, 0x68, 0x4d, 0x69, 0x6e, 0x4d, 0x61, 0x78, 0x56, - 0x61, 0x72, 0x73, 0x2f, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x70, 0x6f, 0x73, - 0x65, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x04, 0x00, 0x08, 0x00, - 0x0c, 0x00, 0x10, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, - 0x20, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x31, 0x83, 0xce, 0x3a, 0x01, 0x00, 0x00, 0x00, - 0x4d, 0x97, 0x92, 0x3e, 0x01, 0x00, 0x00, 0x00, 0x84, 0x75, 0xec, 0xbd, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x62, 0x1b, 0x1c, 0x3b, 0x03, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x09, - 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, - 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x08, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x14, 0x00, 0x1c, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x07, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, - 0x01, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, - 0x28, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00, 0x14, 0x00, 0x18, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x07, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, - 0x10, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, - 0x1c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, - 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, + 0x1c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc new file mode 100644 index 0000000000000000000000000000000000000000..3ad29e53c83ddce9fcde7dae578de678d1dc75b8 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc @@ -0,0 +1,158 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h" + +/* File automatically created by + * tensorflow/examples/speech_commands/wav_to_features.py \ + * --sample_rate=16000 \ + * --clip_duration_ms=1000 \ + * --window_size_ms=30 \ + * --window_stride_ms=20 \ + * --feature_bin_count=40 \ + * --quantize \ + * --preprocess="average" \ + * --input_wav="speech_commands_test_set_v0.02/yes/f2e59fea_nohash_1.wav" \ + * --output_c_file="yes_features_data.cc" \ + */ + +const int g_yes_f2e59fea_nohash_1_width = 43; +const int g_yes_f2e59fea_nohash_1_height = 49; +const unsigned char g_yes_f2e59fea_nohash_1_data[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 19, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 3, 3, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 89, 8, 0, 0, 0, 0, 0, 0, 0, 0, 4, 13, + 1, 6, 23, 20, 6, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 177, 42, 1, + 1, 0, 0, 0, 0, 2, 3, 119, 51, 5, 139, 92, 58, 58, 15, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 13, 165, 176, 3, 1, 1, 0, 0, 1, 1, 32, 214, + 26, 19, 113, 103, 28, 22, 27, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 55, 128, + 27, 1, 1, 0, 1, 4, 2, 52, 93, 10, 28, 156, 10, 21, 21, 3, 3, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 14, 99, 32, 65, 7, 1, 2, 2, 6, 13, 121, + 36, 15, 11, 112, 125, 14, 5, 13, 4, 4, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 25, + 32, 5, 1, 0, 0, 0, 1, 0, 7, 5, 1, 1, 3, 3, 0, 3, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 13, 13, 5, 1, 0, 0, 0, 0, 0, 3, + 4, 1, 0, 1, 2, 3, 1, 1, 1, 4, 8, 1, 2, 1, 3, 1, 1, + 0, 1, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 8, 2, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 2, 0, 2, + 1, 0, 2, 0, 2, 2, 3, 1, 1, 0, 1, 1, 4, 5, 1, 0, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 2, 1, 0, 1, 3, 1, + 1, 3, 1, 1, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 6, 2, 4, 2, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 0, 1, 2, 1, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 2, 3, 5, 2, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +}; diff --git a/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h new file mode 100644 index 0000000000000000000000000000000000000000..33ac2308624235fc380782cd61e6a0247b81b093 --- /dev/null +++ b/tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.h @@ -0,0 +1,23 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_ +#define TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_ + +extern const int g_yes_f2e59fea_nohash_1_width; +extern const int g_yes_f2e59fea_nohash_1_height; +extern const unsigned char g_yes_f2e59fea_nohash_1_data[]; + +#endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_SPEECH_YES_FEATURES_DATA_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h index 104509c9dc6123e84c45f26d03465f608f100310..3b6554dea6a59feb2d2675ef58005e21a8001887 100644 --- a/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h +++ b/tensorflow/contrib/lite/experimental/micro/testing/micro_test.h @@ -135,4 +135,22 @@ extern tflite::ErrorReporter* reporter; } \ } while (false) +#define TF_LITE_MICRO_EXPECT_GT(x, y) \ + do { \ + if ((x) <= (y)) { \ + micro_test::reporter->Report(#x " > " #y " failed at %s:%d", __FILE__, \ + __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + +#define TF_LITE_MICRO_EXPECT_LT(x, y) \ + do { \ + if ((x) >= (y)) { \ + micro_test::reporter->Report(#x " < " #y " failed at %s:%d", __FILE__, \ + __LINE__); \ + micro_test::did_test_fail = true; \ + } \ + } while (false) + #endif // TENSORFLOW_CONTRIB_LITE_EXPERIMENTAL_MICRO_TESTING_MICRO_TEST_H_ diff --git a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile index 880bb4763cbbaf58db286ff142a822fbab60dfd8..3f749e53ef1aa995247f16cba059c369e27757c9 100644 --- a/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile +++ b/tensorflow/contrib/lite/experimental/micro/tools/make/Makefile @@ -55,7 +55,9 @@ MICROLITE_LIB_NAME := libtensorflow-microlite.a # Test binary for the microcontroller speech model. MICRO_SPEECH_TEST_SRCS := \ tensorflow/contrib/lite/experimental/micro/examples/micro_speech/micro_speech_test.cc \ -tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/tiny_conv_model_data.cc \ +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/no_features_data.cc \ +tensorflow/contrib/lite/experimental/micro/examples/micro_speech/yes_features_data.cc MICROLITE_TEST_SRCS := \ $(wildcard tensorflow/contrib/lite/experimental/micro/*test.cc) \ diff --git a/tensorflow/contrib/lite/g3doc/_book.yaml b/tensorflow/contrib/lite/g3doc/_book.yaml index de6914e5366acef53a853a73f791dcfa801d444c..05c65441c3db4e74b6e7834437fa9cd0633af636 100644 --- a/tensorflow/contrib/lite/g3doc/_book.yaml +++ b/tensorflow/contrib/lite/g3doc/_book.yaml @@ -39,6 +39,16 @@ upper_tabs: - title: TensorFlow Lite for Raspberry Pi path: /lite/rpi + - heading: TF Lite converter + - title: Overview + path: /lite/convert/ + - title: Python API guide + path: /lite/convert/python_api + - title: Command line examples + path: /lite/convert/cmdline_examples + - title: Command line reference + path: /lite/convert/cmdline_reference + - title: TF Mobile style: accordion status: deprecated diff --git a/tensorflow/contrib/lite/g3doc/_index.yaml b/tensorflow/contrib/lite/g3doc/_index.yaml index bc66cc5dc1606537b7e186f3c825ab8335aa9e91..44ee6ba7505d421e46c8806ea5ca0ed4bc07f147 100644 --- a/tensorflow/contrib/lite/g3doc/_index.yaml +++ b/tensorflow/contrib/lite/g3doc/_index.yaml @@ -97,7 +97,7 @@ landing_page: path: https://www.shazam.com/ - custom_image: path: ./images/landing-page/nest_logo.png - path: https://nest.com/ + path: https://nest.com/ - custom_image: path: ./images/landing-page/loseit_logo.png path: https://www.loseit.com/ @@ -129,10 +129,10 @@ landing_page: icon_name: autorenew description: > Convert a TensorFlow model into a compressed flat buffer with the - TensorFlow Lite Optimizing Converter (TOCO). + TensorFlow Lite Converter. buttons: - - label: Read the TOCO guide - path: https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/g3doc/python_api.md + - label: Read the converter guide + path: /lite/convert/ classname: button button-primary tfo-button-primary - heading: Deploy icon: diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md b/tensorflow/contrib/lite/g3doc/convert/cmdline_examples.md similarity index 76% rename from tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md rename to tensorflow/contrib/lite/g3doc/convert/cmdline_examples.md index aba7536cbd3fbec509390158896e078e6379c848..44fb4f19aeb12fa83f76b6373bcbc148561d0747 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_examples.md +++ b/tensorflow/contrib/lite/g3doc/convert/cmdline_examples.md @@ -1,57 +1,33 @@ -# TensorFlow Lite Optimizing Converter command-line examples - -This page provides examples on how to use TOCO via command line. It is -complemented by the following documents: - -* [README](../README.md) -* [Command-line glossary](cmdline_reference.md) -* [Python API examples](python_api.md) - -Table of contents: - -* [Command-line tools](#tools) - * [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9) -* [Basic examples](#basic) - * [Convert a TensorFlow GraphDef](#graphdef) - * [Convert a TensorFlow SavedModel](#savedmodel) - * [Convert a tf.keras model](#keras) -* [Quantization](#quantization) - * [Convert a TensorFlow GraphDef for quantized inference](#graphdef-quant) - * [Use "dummy-quantization" to try out quantized inference on a float - graph](#dummy-quant) -* [Specifying input and output arrays](#specifying-input-and-output-arrays) - * [Multiple input arrays](#multiple-input-arrays) - * [Multiple output arrays](#multiple-output-arrays) - * [Specifying subgraphs](#specifying-subgraphs) -* [Graph visualizations](#graph-visualizations) - * [Using --output_format=GRAPHVIZ_DOT](#using-output-format-graphviz-dot) - * [Using --dump_graphviz_dir](#using-dump-graphviz-dir) - * [Graph "video" logging](#graph-video-logging) - * [Legend for the graph visualizations](#graphviz-legend) +# Converter command-line examples + +This page shows how to use the TensorFlow Lite Converter in the command line. + +[TOC] ## Command-line tools -There are two approaches to running TOCO via command line. +There are two approaches to running the converter in the command line. * `tflite_convert`: Starting from TensorFlow 1.9, the command-line tool - `tflite_convert` will be installed as part of the Python package. All of the + `tflite_convert` is installed as part of the Python package. All of the examples below use `tflite_convert` for simplicity. * Example: `tflite_convert --output_file=...` -* `bazel`: In order to run the latest version of TOCO, [clone the TensorFlow - repository](https://www.tensorflow.org/install/source) - and use `bazel`. This is the recommended approach for converting models that - utilize new features that were not supported by TOCO in TensorFlow 1.9. +* `bazel`: In order to run the latest version of the TensorFlow Lite Converter + either install the nightly build using + [pip](https://www.tensorflow.org/install/pip) or + [clone the TensorFlow repository](https://www.tensorflow.org/install/source) + and use `bazel`. * Example: `bazel run //tensorflow/contrib/lite/python:tflite_convert -- --output_file=...` -### Converting models prior to TensorFlow 1.9. +### Converting models prior to TensorFlow 1.9 -The recommended approach for using TOCO prior to TensorFlow 1.9 is the [Python -API](python_api.md#pre-tensorflow-1.9). If a command line tool is desired, the -`toco` command line tool was available in TensorFlow 1.7. Enter `toco --help` in -Terminal for additional details on the command-line flags available. There were -no command line tools in TensorFlow 1.8. +The recommended approach for using the converter prior to TensorFlow 1.9 is the +[Python API](python_api.md#pre_tensorflow_1.9). If a command line tool is +desired, the `toco` command line tool was available in TensorFlow 1.7. Enter +`toco --help` in Terminal for additional details on the command-line flags +available. There were no command line tools in TensorFlow 1.8. ## Basic examples @@ -115,11 +91,11 @@ tflite_convert \ ## Quantization -### Convert a TensorFlow GraphDef for quantized inference +### Convert a TensorFlow GraphDef for quantized inference -TOCO is compatible with fixed point quantization models described -[here](https://www.tensorflow.org/performance/quantization). These are float -models with +The TensorFlow Lite Converter is compatible with fixed point quantization models +described [here](https://www.tensorflow.org/performance/quantization). These are +float models with [`FakeQuant*`](https://www.tensorflow.org/api_guides/python/array_ops#Fake_quantization) ops inserted at the boundaries of fused layers to record min-max range information. This generates a quantized inference workload that reproduces the @@ -139,14 +115,14 @@ tflite_convert \ --std_dev_values=127 ``` -### Use \"dummy-quantization\" to try out quantized inference on a float graph +### Use \"dummy-quantization\" to try out quantized inference on a float graph -In order to evaluate the possible benefit of generating a quantized graph, TOCO -allows "dummy-quantization" on float graphs. The flags `--default_ranges_min` -and `--default_ranges_max` accept plausible values for the min-max ranges of the -values in all arrays that do not have min-max information. "Dummy-quantization" -will produce lower accuracy but will emulate the performance of a correctly -quantized model. +In order to evaluate the possible benefit of generating a quantized graph, the +converter allows "dummy-quantization" on float graphs. The flags +`--default_ranges_min` and `--default_ranges_max` accept plausible values for +the min-max ranges of the values in all arrays that do not have min-max +information. "Dummy-quantization" will produce lower accuracy but will emulate +the performance of a correctly quantized model. The example below contains a model using Relu6 activation functions. Therefore, a reasonable guess is that most activation ranges should be contained in [0, 6]. @@ -207,10 +183,10 @@ tflite_convert \ ### Specifying subgraphs Any array in the input file can be specified as an input or output array in -order to extract subgraphs out of an input graph file. TOCO discards the parts -of the graph outside of the specific subgraph. Use [graph -visualizations](#graph-visualizations) to identify the input and output arrays -that make up the desired subgraph. +order to extract subgraphs out of an input graph file. The TensorFlow Lite +Converter discards the parts of the graph outside of the specific subgraph. Use +[graph visualizations](#graph_visualizations) to identify the input and output +arrays that make up the desired subgraph. The follow command shows how to extract a single fused layer out of a TensorFlow GraphDef. @@ -247,11 +223,12 @@ function tends to get fused). ## Graph visualizations -TOCO can export a graph to the Graphviz Dot format for easy visualization via -either the `--output_format` flag or the `--dump_graphviz_dir` flag. The -subsections below outline the use cases for each. +The converter can export a graph to the Graphviz Dot format for easy +visualization using either the `--output_format` flag or the +`--dump_graphviz_dir` flag. The subsections below outline the use cases for +each. -### Using `--output_format=GRAPHVIZ_DOT` +### Using `--output_format=GRAPHVIZ_DOT` The first way to get a Graphviz rendering is to pass `GRAPHVIZ_DOT` into `--output_format`. This results in a plausible visualization of the graph. This @@ -323,10 +300,23 @@ As before, these can be rendered to PDFs: dot -Tpdf -O /tmp/toco_*.dot ``` -Sample output files can be seen here: - -* [toco_AT_IMPORT.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AT_IMPORT.dot.pdf) -* [toco_AFTER_TRANSFORMATIONS.dot.pdf](https://storage.googleapis.com/download.tensorflow.org/example_images/toco_AFTER_TRANSFORMATIONS.dot.pdf). +Sample output files can be seen here below. Note that it is the same +`AveragePool` node in the top right of each image. + + + + + + +
+ + + + + + + +
beforeafter
### Graph "video" logging @@ -336,7 +326,7 @@ each individual graph transformation, resulting in thousands of files. Typically, one would then bisect into these files to understand when a given change was introduced in the graph. -### Legend for the graph visualizations +### Legend for the graph visualizations * Operators are red square boxes with the following hues of red: * Most operators are @@ -345,7 +335,7 @@ change was introduced in the graph. * Some typically heavy operators (e.g. Conv) are rendered in a darker red. -* Arrays are octogons with the following colors: +* Arrays are octagons with the following colors: * Constant arrays are blue. * Activation arrays are gray: diff --git a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md b/tensorflow/contrib/lite/g3doc/convert/cmdline_reference.md similarity index 91% rename from tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md rename to tensorflow/contrib/lite/g3doc/convert/cmdline_reference.md index 00bc8d4ccb8aedcfe701377419e6cd41d0b59855..d72a46760d48dae46d63f1e914d8afda3f527e27 100644 --- a/tensorflow/contrib/lite/toco/g3doc/cmdline_reference.md +++ b/tensorflow/contrib/lite/g3doc/convert/cmdline_reference.md @@ -1,19 +1,10 @@ -# TensorFlow Lite Optimizing Converter command-line glossary +# Converter command-line reference -This page is complete reference of command-line flags used by TOCO's command -line starting from TensorFlow 1.9 up until the most recent build of TensorFlow. -It is complemented by the following other documents: +This page is complete reference of command-line flags used by the TensorFlow +Lite Converter's command line starting from TensorFlow 1.9 up until the most +recent build of TensorFlow. -* [README](../README.md) -* [Command-line examples](cmdline_examples.md) -* [Python API examples](python_api.md) - -Table of contents: - -* [High-level flags](#high-level-flags) -* [Model flags](#model-flags) -* [Transformation flags](#transformation-flags) -* [Logging flags](#logging-flags) +[TOC] ## High-level flags @@ -32,7 +23,7 @@ files. The flag `--output_file` is always required. Additionally, either * `--output_format`. Type: string. Default: `TFLITE`. Specifies the format of the output file. Allowed values: * `TFLITE`: TensorFlow Lite FlatBuffer format. - * `GRAPHVIZ_DOT`: GraphViz `.dot` format containg a visualization of the + * `GRAPHVIZ_DOT`: GraphViz `.dot` format containing a visualization of the graph after graph transformations. * Note that passing `GRAPHVIZ_DOT` to `--output_format` leads to loss of TFLite specific transformations. Therefore, the resulting @@ -68,7 +59,7 @@ based on index. * `--input_shapes`. Type: colon-separated list of comma-separated lists of integers. Each comma-separated list of integers gives the shape of one of the input arrays specified in - [TensorFlow convention](https://www.tensorflow.org/versions/r1.2/programmers_guide/dims_types#shape). + [TensorFlow convention](https://www.tensorflow.org/guide/tensors#shape). * Example: `--input_shapes=1,60,80,3` for a typical vision model means a batch size of 1, an input image height of 60, an input image width of 80, and an input image depth of 3 (representing RGB channels). diff --git a/tensorflow/contrib/lite/g3doc/convert/index.md b/tensorflow/contrib/lite/g3doc/convert/index.md new file mode 100644 index 0000000000000000000000000000000000000000..bc92a1c1a11a6f3808e44f37d04704ece1627fc3 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/convert/index.md @@ -0,0 +1,19 @@ +# TensorFlow Lite Converter + +The TensorFlow Lite Converter takes a TensorFlow graph file and creates a graph +file used by the TensorFlow Lite interpreter. + +## From model training to device deployment + +After a TensorFlow model is trained, the TensorFlow Lite converter uses that +model to generate a TensorFlow Lite [FlatBuffer](https://google.github.io/flatbuffers/) +file (`.tflite`). The converter supports as input: +[SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators), +frozen graphs (models generated by +[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)), +and `tf.keras` models. The TensorFlow Lite `FlatBuffer` file is deployed to a +client device (generally a mobile or embedded device), and the TensorFlow Lite +interpreter uses the compressed model for on-device inference. This conversion +process is shown in the diagram below: + +![TFLite converter workflow](../images/convert/workflow.svg) diff --git a/tensorflow/contrib/lite/toco/g3doc/python_api.md b/tensorflow/contrib/lite/g3doc/convert/python_api.md similarity index 60% rename from tensorflow/contrib/lite/toco/g3doc/python_api.md rename to tensorflow/contrib/lite/g3doc/convert/python_api.md index 8c31c3dca865640ee1a60cbcc93b741f2d7d52cf..9dcb79187ec9bda487887327dbb575e8c580ba01 100644 --- a/tensorflow/contrib/lite/toco/g3doc/python_api.md +++ b/tensorflow/contrib/lite/g3doc/convert/python_api.md @@ -1,67 +1,43 @@ -# TensorFlow Lite Optimizing Converter & Interpreter Python API reference - -This page provides examples on how to use TOCO and the TensorFlow Lite -interpreter via the Python API. It is complemented by the following documents: - -* [README](../README.md) -* [Command-line examples](cmdline_examples.md) -* [Command-line glossary](cmdline_reference.md) - -Table of contents: - -* [High-level overview](#high-level-overview) -* [API](#api) -* [Basic examples](#basic) - * [Exporting a GraphDef from tf.Session](#basic-graphdef-sess) - * [Exporting a GraphDef from file](#basic-graphdef-file) - * [Exporting a SavedModel](#basic-savedmodel) - * [Exporting a tf.keras File](#basic-keras-file) -* [Complex examples](#complex) - * [Exporting a quantized GraphDef](#complex-quant) -* [TensorFlow Lite Python interpreter](#interpreter) - * [Using the interpreter from a model file](#interpreter-file) - * [Using the interpreter from model data](#interpreter-data) -* [Additional instructions](#additional-instructions) - * [Build from source code](#latest-package) - * [Converting models prior to TensorFlow 1.9.](#pre-tensorflow-1.9) +# Converter Python API guide + +This page provides examples on how to use the TensorFlow Lite Converter and the +TensorFlow Lite interpreter using the Python API. + +[TOC] + ## High-level overview -While the TensorFlow Lite Optimizing Converter can be used from the command -line, it is often convenient to use it as part of a Python model build and -training script. This is so that conversion can be part of your model -development pipeline. This allows you to know early and often that you are -designing a model that can be targeted to devices with mobile. +While the TensorFlow Lite Converter can be used from the command line, it is +often convenient to use in a Python script as part of the model development +pipeline. This allows you to know early that you are designing a model that can +be targeted to devices with mobile. ## API The API for converting TensorFlow models to TensorFlow Lite as of TensorFlow 1.9 -is `tf.contrib.lite.TocoConverter`. The API for calling the Python intepreter is -`tf.contrib.lite.Interpreter`. - -**NOTE**: As of TensorFlow 1.12, the API for converting TensorFlow models to -TFLite will be renamed to `TFLiteConverter`. `TFLiteConverter` is semantically -identically to `TocoConverter`. The API is available at -`tf.contrib.lite.TFLiteConverter` as of the Sept 26 `tf-nightly`. - -`TocoConverter` provides class methods based on the original format of the -model. `TocoConverter.from_session()` is available for GraphDefs. -`TocoConverter.from_saved_model()` is available for SavedModels. -`TocoConverter.from_keras_model_file()` is available for `tf.Keras` files. +is `tf.contrib.lite.TFLiteConverter`. The API for calling the Python intepreter +is `tf.contrib.lite.Interpreter`. + +Note: Reference "Additional Instructions" sections for converting TensorFlow +models to TensorFlow Lite +[in TensorFlow 1.9 to TensorFlow 1.11](#pre_tensorflow_1.11) and +[prior to TensorFlow 1.9](#pre_tensorflow_1.9) + +`TFLiteConverter` provides class methods based on the original format of the +model. `TFLiteConverter.from_session()` is available for GraphDefs. +`TFLiteConverter.from_saved_model()` is available for SavedModels. +`TFLiteConverter.from_keras_model_file()` is available for `tf.Keras` files. Example usages for simple float-point models are shown in [Basic Examples](#basic). Examples usages for more complex models is shown in [Complex Examples](#complex). -**NOTE**: Currently, `TocoConverter` will cause a fatal error to the Python -interpreter when the conversion fails. This will be remedied as soon as -possible. - ## Basic examples The following section shows examples of how to convert a basic float-point model from each of the supported data formats into a TensorFlow Lite FlatBuffers. -### Exporting a GraphDef from tf.Session +### Exporting a GraphDef from tf.Session The following example shows how to convert a TensorFlow GraphDef into a TensorFlow Lite FlatBuffer from a `tf.Session` object. @@ -76,12 +52,12 @@ out = tf.identity(val, name="out") with tf.Session() as sess: sess.run(tf.global_variables_initializer()) - converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out]) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` -### Exporting a GraphDef from file +### Exporting a GraphDef from file The following example shows how to convert a TensorFlow GraphDef into a TensorFlow Lite FlatBuffer when the GraphDef is stored in a file. Both `.pb` and @@ -89,7 +65,7 @@ TensorFlow Lite FlatBuffer when the GraphDef is stored in a file. Both `.pb` and The example uses [Mobilenet_1.0_224](https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz). -The function only supports GraphDefs frozen via +The function only supports GraphDefs frozen using [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py). ```python @@ -99,13 +75,13 @@ graph_def_file = "/path/to/Downloads/mobilenet_v1_1.0_224/frozen_graph.pb" input_arrays = ["input"] output_arrays = ["MobilenetV1/Predictions/Softmax"] -converter = tf.contrib.lite.TocoConverter.from_frozen_graph( +converter = tf.contrib.lite.TFLiteConverter.from_frozen_graph( graph_def_file, input_arrays, output_arrays) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` -### Exporting a SavedModel +### Exporting a SavedModel The following example shows how to convert a SavedModel into a TensorFlow Lite FlatBuffer. @@ -113,25 +89,26 @@ FlatBuffer. ```python import tensorflow as tf -converter = tf.contrib.lite.TocoConverter.from_saved_model(saved_model_dir) +converter = tf.contrib.lite.TFLiteConverter.from_saved_model(saved_model_dir) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` For more complex SavedModels, the optional parameters that can be passed into -`TocoConverter.from_saved_model()` are `input_arrays`, `input_shapes`, +`TFLiteConverter.from_saved_model()` are `input_arrays`, `input_shapes`, `output_arrays`, `tag_set` and `signature_key`. Details of each parameter are -available by running `help(tf.contrib.lite.TocoConverter)`. +available by running `help(tf.contrib.lite.TFLiteConverter)`. -### Exporting a tf.keras File +### Exporting a tf.keras File The following example shows how to convert a `tf.keras` model into a TensorFlow -Lite FlatBuffer. +Lite FlatBuffer. This example requires +[`h5py`](http://docs.h5py.org/en/latest/build.html) to be installed. ```python import tensorflow as tf -converter = tf.contrib.lite.TocoConverter.from_keras_model_file("keras_model.h5") +converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file("keras_model.h5") tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` @@ -163,7 +140,7 @@ keras_file = "keras_model.h5" tf.keras.models.save_model(model, keras_file) # Convert to TensorFlow Lite model. -converter = tf.contrib.lite.TocoConverter.from_keras_model_file(keras_file) +converter = tf.contrib.lite.TFLiteConverter.from_keras_model_file(keras_file) tflite_model = converter.convert() open("converted_model.tflite", "wb").write(tflite_model) ``` @@ -173,13 +150,13 @@ open("converted_model.tflite", "wb").write(tflite_model) For models where the default value of the attributes is not sufficient, the attribute's values should be set before calling `convert()`. In order to call any constants use `tf.contrib.lite.constants.` as seen below with -`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TocoConverter)` in the Python +`QUANTIZED_UINT8`. Run `help(tf.contrib.lite.TFLiteConverter)` in the Python terminal for detailed documentation on the attributes. Although the examples are demonstrated on GraphDefs containing only constants. The same logic can be applied irrespective of the input data format. -### Exporting a quantized GraphDef +### Exporting a quantized GraphDef The following example shows how to convert a quantized model into a TensorFlow Lite FlatBuffer. @@ -193,7 +170,7 @@ val = img + const out = tf.fake_quant_with_min_max_args(val, min=0., max=1., name="output") with tf.Session() as sess: - converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out]) converter.inference_type = tf.contrib.lite.constants.QUANTIZED_UINT8 input_arrays = converter.get_input_arrays() converter.quantized_input_stats = {input_arrays[0] : (0., 1.)} # mean, std_dev @@ -203,7 +180,7 @@ with tf.Session() as sess: ## TensorFlow Lite Python interpreter -### Using the interpreter from a model file +### Using the interpreter from a model file The following example shows how to use the TensorFlow Lite Python interpreter when provided a TensorFlow Lite FlatBuffer file. The example also demonstrates @@ -233,7 +210,7 @@ output_data = interpreter.get_tensor(output_details[0]['index']) print(output_data) ``` -### Using the interpreter from model data +### Using the interpreter from model data The following example shows how to use the TensorFlow Lite Python interpreter when starting with the TensorFlow Lite Flatbuffer model previously loaded. This @@ -250,7 +227,7 @@ val = img + const out = tf.identity(val, name="out") with tf.Session() as sess: - converter = tf.contrib.lite.TocoConverter.from_session(sess, [img], [out]) + converter = tf.contrib.lite.TFLiteConverter.from_session(sess, [img], [out]) tflite_model = converter.convert() # Load TFLite model and allocate tensors. @@ -260,15 +237,22 @@ interpreter.allocate_tensors() ## Additional instructions -### Build from source code +### Build from source code + +In order to run the latest version of the TensorFlow Lite Converter Python API, +either install the nightly build with +[pip](https://www.tensorflow.org/install/pip) (recommended) or +[Docker](https://www.tensorflow.org/install/docker), or +[build the pip package from source](https://www.tensorflow.org/install/source). + +### Converting models in TensorFlow 1.9 to TensorFlow 1.11 -In order to run the latest version of the TOCO Python API, clone the TensorFlow -repository, configure the installation, and build and install the pip package. -Detailed instructions are available -[here](https://www.tensorflow.org/install/source). +To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.9 through +TensorFlow 1.11, use `TocoConverter`. `TocoConverter` is semantically +identically to `TFLiteConverter`. -### Converting models prior to TensorFlow 1.9. +### Converting models prior to TensorFlow 1.9 -To use TOCO in TensorFlow 1.7 and TensorFlow 1.8, use the `toco_convert` -function. Run `help(tf.contrib.lite.toco_convert)` to get details about accepted -parameters. +To convert TensorFlow models to TensorFlow Lite in TensorFlow 1.7 and TensorFlow +1.8, use the `toco_convert` function. Run `help(tf.contrib.lite.toco_convert)` +to get details about accepted parameters. diff --git a/tensorflow/contrib/lite/g3doc/images/convert/sample_after.png b/tensorflow/contrib/lite/g3doc/images/convert/sample_after.png new file mode 100644 index 0000000000000000000000000000000000000000..6c451f97903f7f70a9f28dee8abf6daeb7ec5693 Binary files /dev/null and b/tensorflow/contrib/lite/g3doc/images/convert/sample_after.png differ diff --git a/tensorflow/contrib/lite/g3doc/images/convert/sample_before.png b/tensorflow/contrib/lite/g3doc/images/convert/sample_before.png new file mode 100644 index 0000000000000000000000000000000000000000..e5317ef295062e79c66430512ef1c45925858ce0 Binary files /dev/null and b/tensorflow/contrib/lite/g3doc/images/convert/sample_before.png differ diff --git a/tensorflow/contrib/lite/g3doc/images/convert/workflow.svg b/tensorflow/contrib/lite/g3doc/images/convert/workflow.svg new file mode 100644 index 0000000000000000000000000000000000000000..3dfcbd67d8919bd1ffe2a09d7b291a7c3182fccd --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/images/convert/workflow.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png new file mode 100644 index 0000000000000000000000000000000000000000..44d0ccd3128dea1c947e57ccbc4e18b2d34cef88 Binary files /dev/null and b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_accuracy.png differ diff --git a/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png new file mode 100644 index 0000000000000000000000000000000000000000..94a6310612828db2370d19a094795341478e90f8 Binary files /dev/null and b/tensorflow/contrib/lite/g3doc/images/performance/model_size_vs_latency.png differ diff --git a/tensorflow/contrib/lite/g3doc/performance.md b/tensorflow/contrib/lite/g3doc/performance.md index 6b7943caf8fe4ac5d7a97361c35138898f9b5661..ed114527166da79dba2d92c3ffad78e9885f9e94 100644 --- a/tensorflow/contrib/lite/g3doc/performance.md +++ b/tensorflow/contrib/lite/g3doc/performance.md @@ -3,8 +3,15 @@ Mobile and embedded devices have limited computational resources and it is important to keep your application resource efficient. We have compiled a list of best practices and strategies you can use to optimize your model and application when using Tensorflow Lite. -## Choose the most efficient model for the problem -Some models may be too large to run on embedded devices. Instead of large models it is better to use a slightly less precise but smaller model for embedded devices. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices. +## Choose the best model for the task +Depending on the task you will need to make a tradeoff between model complexity and size. If your task requires high accuracy then you may need a large and complex model. Some tasks may work with a less precise model, for these tasks it is better to use a smaller but less precise model. Smaller models not only use less disk space and memory but are generally faster and more energy efficient. For example, graphs below show accuracy and latency tradeoff for some common image classification models. + +![accuracy vs model size](images/performance/model_size_vs_accuracy.png "Accuracy vs Model size") + + +![latency vs model size](images/performance/model_size_vs_latency.png "Latency vs Model size") + +One example of models optimized for mobile devices are [MobileNets](https://arxiv.org/abs/1704.04861), which are optimized for mobile vision applications. Tensorflow Lite [models page](models.md) lists several other models that have been optimized specifically for mobile and embedded devices. You can retrain the listed models on your own dataset by using transfer learning. Check out our transfer learning tutorial for [image classification](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) and @@ -12,25 +19,25 @@ You can retrain the listed models on your own dataset by using transfer learning ## Profile your model -Before starting any optimization, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time. +Once you have selected a candidate model that is right for your task, it is a good practice to profile and benchmark your model. Tensorflow Lite [benchmarking tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/tools/benchmark) has a built-in profiler that shows per operator profiling statistics. This can help in understanding performance bottlenecks and which operators dominate the computation time. ## Profile and optimize operators in the graph If a particular operator appears frequently in the model and based on profiling you find the operator consuming the most amount of time, you can look into optimizing the operator. This scenario should be rare as Tensorflow Lite has optimized versions for most ops. However you may be able to write a faster version of a custom op, if you know the constraints in which the operator is executed. Check out our [custom operator documentation](custom_operators.md). ## Quantize your model -If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. Fully quantized models can be remarkably power efficient as well. +If your model uses floating point weights or activations then it may be possible to reduce the size of model up to ~4x by using quantization and other model optimizations. Check out our [model optimization toolkit](https://www.tensorflow.org/performance/model_optimization) for details about optimizing your model. ## Tweak the number of threads -Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads. +Tensorflow Lite supports multi-threaded kernels for many operators. You can increase the number of threads and speed up execution of operators. Increasing the number of threads will however make your model use more resources and power. For some applications latency may be more important than energy efficiency. You can increase the number of threads by setting the number of [interpreter](https://github.com/tensorflow/tensorflow/blob/1084594657a5d139102ac794f84d1427a710e39a/tensorflow/contrib/lite/interpreter.h#L337) threads. Multi-threaded execution however comes at the cost of increased performance variability depending on what else is been executed concurrently. This is particularly the case for mobile apps. For example, isolated tests may show 2x speed up vs single-threaded but if another app is executing at the same time may result in worst performance than single-threaded. ## Eliminate redundant copies -Tensorflow Lite is optimized to reduce redundant copies. The APIs allow user to [mmap a model file](https://github.com/tensorflow/tensorflow/blob/9982fd6c8831cbd2f58954f79ea71f26660393bc/tensorflow/contrib/lite/model.h#L152) and avoid copies. If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151). +If your application is not careful, there can be redundant copies when feeding the input to the model and reading output from the model. Make sure to eliminate redundant copies. If you are using higher level APIs like Java API, make sure to carefully check the documentation for performance caveats. For example, the Java API is a lot faster if ByteBuffers are used as [inputs](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java#L151). ## Profile your application with platform specific tools Platform specific tools like [Android profiler](https://developer.android.com/studio/profile/android-profiler) and [Instruments](https://help.apple.com/instruments/mac/current/) provide a wealth of profiling information that can be used to debug your app. Sometimes the performance bug may be not in the model but in parts of application code that interact with the model. Make sure to familiarize yourself with platform specific profiling tools and best practices for your platform. -## Use hardware accelerators available on the device +## Evaluate whether your model benefits from using hardware accelerators available on the device Tensorflow Lite is working on adding support for accelerators like GPU and provides acceleration through [Neural Networks API](https://developer.android.com/ndk/guides/neuralnetworks/) on Android. You can utilize these hardware accelerator backends to improve the speed and efficiency of your model. To enable Neural Networks API call [UseNNAPI](https://github.com/tensorflow/tensorflow/blob/6305a6d83552ba6a472cd72398b60d9241467f1f/tensorflow/contrib/lite/interpreter.h#L334) on the interpreter instance. diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md index b0f32a8d6ca91229489c73c2c6f52d9c82d37b37..2eb776d10cf8ec68987d13b580eddf2f1bda8e78 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/android_build.md @@ -1,6 +1,22 @@ - # Building TensorFlow on Android +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ To get you started working with TensorFlow on Android, we'll walk through two ways to build our TensorFlow mobile demos and deploying them on an Android device. The first is Android Studio, which lets you build and deploy in an diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/index.md b/tensorflow/contrib/lite/g3doc/tfmobile/index.md index 49ad35d4e6a18f266d88e330626bae8bf1fc499f..15f0fd396134e40e89266182cb308080d9d250cb 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/index.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/index.md @@ -1,6 +1,22 @@ - # Overview +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ TensorFlow was designed to be a good deep learning solution for mobile platforms. Currently we have two solutions for deploying machine learning applications on mobile and embedded devices: TensorFlow for Mobile and diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md index be8b4100c89f4b02e651b1585faf438881c9119d..d922907cdc5fe5ccec8864b456586fce0293a0af 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/ios_build.md @@ -1,6 +1,22 @@ - # Building TensorFlow on iOS +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ ## Using CocoaPods The simplest way to get started with TensorFlow on iOS is using the CocoaPods diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md index 4d4bb3bc081d613714271f8b0bf7461cb1e0f4d5..fd0e322c93493ed835ae7ec9766a708885c6ac88 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/linking_libs.md @@ -1,6 +1,22 @@ - # Integrating TensorFlow libraries +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ Once you have made some progress on a model that addresses the problem you’re trying to solve, it’s important to test it out inside your application immediately. There are often unexpected differences between your training data diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md index 7436594fd8580151ba66562eccd408cc7e6c4201..59ff8e774c6c63a01668aee7d6caeea01171468d 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/optimizing.md @@ -1,6 +1,22 @@ - # Optimizing for mobile +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ There are some special issues that you have to deal with when you’re trying to ship on mobile or embedded devices, and you’ll need to think about these as you’re developing your model. diff --git a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md index d1c67d4c61608bcbc9b0bcee5b60f46a73b44692..1d373251ddf3ba6a0119bd57bf14caf100ef371a 100644 --- a/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md +++ b/tensorflow/contrib/lite/g3doc/tfmobile/prepare_models.md @@ -1,6 +1,22 @@ - # Preparing models for mobile deployment +Warning: We expect to deprecate TensorFlow Mobile in early 2019 + +
+

+ TensorFlow Lite is our main mobile and embedded offering. We are + working hard to close the feature gap between TensorFlow Mobile and + TensorFlow Lite. We expect to deprecate TensorFlow Mobile in early 2019. We + will give ample notice to our users when we get to that point and will + provide help and support to ensure easy migrations. +

+

+ In the meantime, please use TensorFlow Lite. If you have a feature request, + such as a missing op, please post to our GitHub. +

+
+ The requirements for storing model information during training are very different from when you want to release it as part of a mobile app. This section covers the tools involved in converting from a training model to something diff --git a/tensorflow/contrib/lite/java/BUILD b/tensorflow/contrib/lite/java/BUILD index 098ba7e7731d833678fbd5eab9cce3f022570f23..cab8d5277f2d3f539e7a69f15ebda20821b19a3b 100644 --- a/tensorflow/contrib/lite/java/BUILD +++ b/tensorflow/contrib/lite/java/BUILD @@ -11,6 +11,10 @@ load("//tensorflow/java:build_defs.bzl", "JAVACOPTS") load("//tensorflow/contrib/lite:build_def.bzl", "tflite_jni_binary") load("//tensorflow/contrib/lite/java:aar_with_jni.bzl", "aar_with_jni") +JAVA_SRCS = glob([ + "src/main/java/org/tensorflow/lite/*.java", +]) + # Building tensorflow-lite.aar including 4 variants of .so # To build an aar for release, run below command: # bazel build --cxxopt='--std=c++11' -c opt --fat_apk_cpu=x86,x86_64,arm64-v8a,armeabi-v7a \ @@ -20,28 +24,38 @@ aar_with_jni( android_library = ":tensorflowlite", ) +# EXPERIMENTAL: AAR target that supports TensorFlow op execution with TFLite. +aar_with_jni( + name = "tensorflow-lite-flex", + android_library = ":tensorflowlite_flex", +) + android_library( name = "tensorflowlite", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, + manifest = "AndroidManifest.xml", + visibility = ["//visibility:public"], + deps = [ + ":tensorflowlite_native", + "@org_checkerframework_qual", + ], +) + +# EXPERIMENTAL: Android target that supports TensorFlow op execution with TFLite. +android_library( + name = "tensorflowlite_flex", + srcs = JAVA_SRCS, manifest = "AndroidManifest.xml", visibility = ["//visibility:public"], deps = [ - ":tflite_runtime", + ":tensorflowlite_native_flex", "@org_checkerframework_qual", ], ) android_library( name = "tensorflowlite_java", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, visibility = ["//visibility:public"], deps = [ "@org_checkerframework_qual", @@ -50,16 +64,23 @@ android_library( java_library( name = "tensorflowlitelib", - srcs = glob( - [ - "src/main/java/org/tensorflow/lite/*.java", - ], - ), + srcs = JAVA_SRCS, javacopts = JAVACOPTS, visibility = ["//visibility:public"], deps = [ ":libtensorflowlite_jni.so", - "//tensorflow/contrib/lite/java/src/main/native", + "@org_checkerframework_qual", + ], +) + +# EXPERIMENTAL: Java target that supports TensorFlow op execution with TFLite. +java_library( + name = "tensorflowlitelib_flex", + srcs = JAVA_SRCS, + javacopts = JAVACOPTS, + visibility = ["//visibility:public"], + deps = [ + ":libtensorflowlite_flex_jni.so", "@org_checkerframework_qual", ], ) @@ -72,7 +93,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.TensorFlowLiteTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -87,7 +107,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.DataTypeTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -110,7 +129,6 @@ java_test( tags = ["no_oss"], test_class = "org.tensorflow.lite.NativeInterpreterWrapperTest", deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", @@ -125,19 +143,37 @@ java_test( data = [ "src/testdata/add.bin", "src/testdata/mobilenet.tflite.bin", + "//tensorflow/contrib/lite:testdata/multi_add_flex.bin", ], javacopts = JAVACOPTS, tags = ["no_oss"], test_class = "org.tensorflow.lite.InterpreterTest", visibility = ["//visibility:private"], deps = [ - ":libtensorflowlite_jni.so", ":tensorflowlitelib", "@com_google_truth", "@junit", ], ) +java_test( + name = "InterpreterFlexTest", + size = "small", + srcs = ["src/test/java/org/tensorflow/lite/InterpreterFlexTest.java"], + data = [ + "//tensorflow/contrib/lite:testdata/multi_add_flex.bin", + ], + javacopts = JAVACOPTS, + tags = ["no_oss"], + test_class = "org.tensorflow.lite.InterpreterFlexTest", + visibility = ["//visibility:private"], + deps = [ + ":tensorflowlitelib_flex", + "@com_google_truth", + "@junit", + ], +) + java_test( name = "TensorTest", size = "small", @@ -164,14 +200,30 @@ filegroup( ) cc_library( - name = "tflite_runtime", + name = "tensorflowlite_native", srcs = ["libtensorflowlite_jni.so"], visibility = ["//visibility:public"], ) +cc_library( + name = "tensorflowlite_native_flex", + srcs = ["libtensorflowlite_flex_jni.so"], + visibility = ["//visibility:public"], +) + tflite_jni_binary( name = "libtensorflowlite_jni.so", deps = [ "//tensorflow/contrib/lite/java/src/main/native", ], ) + +# EXPERIMENTAL: Native target that supports TensorFlow op execution with TFLite. +tflite_jni_binary( + name = "libtensorflowlite_flex_jni.so", + deps = [ + "//tensorflow/contrib/lite/delegates/flex:delegate", + "//tensorflow/contrib/lite/java/src/main/native", + "//tensorflow/contrib/lite/java/src/main/native:init_tensorflow", + ], +) diff --git a/tensorflow/contrib/lite/java/aar_with_jni.bzl b/tensorflow/contrib/lite/java/aar_with_jni.bzl index 9d2aead266f897e8b08520d06ea60654927029e9..360d622b1bcf5cf379987ceefc43c74b1b6ce5fb 100644 --- a/tensorflow/contrib/lite/java/aar_with_jni.bzl +++ b/tensorflow/contrib/lite/java/aar_with_jni.bzl @@ -30,7 +30,10 @@ EOF # In some platforms we don't have an Android SDK/NDK and this target # can't be built. We need to prevent the build system from trying to # use the target in that case. - tags = ["manual"], + tags = [ + "manual", + "no_cuda_on_cpu_tap", + ], ) native.genrule( diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java index 711638a9f995ce270cd362b93a7bcfca990430dc..deded1018285f66f848dfb796e3d90506ef37c3f 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/TensorFlowLite.java @@ -18,24 +18,42 @@ package org.tensorflow.lite; /** Static utility methods loading the TensorFlowLite runtime. */ public final class TensorFlowLite { - private static final String LIBNAME = "tensorflowlite_jni"; + private static final String PRIMARY_LIBNAME = "tensorflowlite_jni"; + private static final String FALLBACK_LIBNAME = "tensorflowlite_flex_jni"; private TensorFlowLite() {} /** Returns the version of the underlying TensorFlowLite runtime. */ public static native String version(); + /** + * Initialize tensorflow's libraries. This will throw an exception if used when TensorFlow isn't + * linked in. + */ + static native void initTensorFlow(); + /** * Load the TensorFlowLite runtime C library. */ static boolean init() { + Throwable primaryLibException; try { - System.loadLibrary(LIBNAME); + System.loadLibrary(PRIMARY_LIBNAME); return true; } catch (UnsatisfiedLinkError e) { - System.err.println("TensorFlowLite: failed to load native library: " + e.getMessage()); - return false; + primaryLibException = e; } + + try { + System.loadLibrary(FALLBACK_LIBNAME); + return true; + } catch (UnsatisfiedLinkError e) { + // If the fallback fails, log the error for the primary load instead. + System.err.println( + "TensorFlowLite: failed to load native library: " + primaryLibException.getMessage()); + } + + return false; } static { diff --git a/tensorflow/contrib/lite/java/src/main/native/BUILD b/tensorflow/contrib/lite/java/src/main/native/BUILD index 4b4e1c21d818dc56803ff31d83d19dea2ac08707..f91345f369fe118839cd6e28032b36c346008f58 100644 --- a/tensorflow/contrib/lite/java/src/main/native/BUILD +++ b/tensorflow/contrib/lite/java/src/main/native/BUILD @@ -73,6 +73,37 @@ genrule( cmd = "cp -f $< $@", ) +cc_library( + name = "init_tensorflow", + srcs = [ + "init_tensorflow_jni.cc", + ] + select({ + # The Android toolchain makes "jni.h" available in the include path. + # For non-Android toolchains, generate jni.h and jni_md.h. + "//tensorflow:android": [], + "//conditions:default": [ + ":jni.h", + ":jni_md.h", + ], + }), + hdrs = [ + "init_tensorflow_jni.h", + ], + copts = tflite_copts(), + includes = select({ + "//tensorflow:android": [], + "//conditions:default": ["."], + }), + linkopts = [ + "-lm", + "-ldl", + ], + deps = [ + "//tensorflow/contrib/lite/testing:init_tensorflow", + ], + alwayslink = 1, +) + # This includes all ops. If you want a smaller binary, you should copy and # modify builtin_ops_jni.cc. You should then link your binary against both # ":native_framework_only" and your own version of ":native_builtin_ops". diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.cc b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.cc similarity index 64% rename from tensorflow/compiler/xla/service/gpu/gpu_options.cc rename to tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.cc index 35b4b4e20b633792de4251a4b0e89f4b579053ce..74aa384df30334ec09cb8e0813d4f6bc94210a4d 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_options.cc +++ b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.cc @@ -13,16 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/compiler/xla/service/gpu/gpu_options.h" -#include "tensorflow/core/lib/gtl/map_util.h" +#include "tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h" +#include "tensorflow/contrib/lite/testing/init_tensorflow.h" -namespace xla { -namespace gpu { - -bool ConvUseLayoutHeuristic(const HloModuleConfig& config) { - return !config.debug_options().xla_backend_extra_options().count( - "xla_gpu_experimental_conv_disable_layout_heuristic"); +JNIEXPORT void JNICALL Java_org_tensorflow_lite_TensorFlowLite_initTensorFlow( + JNIEnv* env, jclass clazz) { + ::tflite::InitTensorFlow(); } - -} // namespace gpu -} // namespace xla diff --git a/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h new file mode 100644 index 0000000000000000000000000000000000000000..4689eb05fedcf86e76c9ddef8b58f4fc67ffc5bd --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/init_tensorflow_jni.h @@ -0,0 +1,36 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_JAVA_SRC_MAIN_NATIVE_INIT_TENSORFLOW_JNI_H_ +#define TENSORFLOW_CONTRIB_LITE_JAVA_SRC_MAIN_NATIVE_INIT_TENSORFLOW_JNI_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +/* + * Class: org_tensorflow_lite_TensorFlowLite + * Method: initTensorFlow + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_tensorflow_lite_TensorFlowLite_initTensorFlow( + JNIEnv* env, jclass clazz); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // TENSORFLOW_CONTRIB_LITE_JAVA_SRC_MAIN_NATIVE_INIT_TENSORFLOW_JNI_H_ diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java new file mode 100644 index 0000000000000000000000000000000000000000..3b3d9f0e7fc0706c35045b85b316bcb16296cd90 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterFlexTest.java @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite; + +import static com.google.common.truth.Truth.assertThat; + +import java.io.File; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** + * Unit tests for {@link org.tensorflow.lite.Interpreter} that validate execution with models that + * have TensorFlow ops. + */ +@RunWith(JUnit4.class) +public final class InterpreterFlexTest { + + private static final File FLEX_MODEL_FILE = + new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin"); + + /** Smoke test validating that flex model loading works when the flex delegate is linked. */ + @Test + public void testFlexModel() throws Exception { + try (Interpreter interpreter = new Interpreter(FLEX_MODEL_FILE)) { + assertThat(interpreter.getInputTensorCount()).isEqualTo(4); + assertThat(interpreter.getInputTensor(0).dataType()).isEqualTo(DataType.FLOAT32); + assertThat(interpreter.getOutputTensorCount()).isEqualTo(4); + assertThat(interpreter.getOutputTensor(0).dataType()).isEqualTo(DataType.FLOAT32); + interpreter.run(new float[1], new float[1]); + } + } + + static { + TensorFlowLite.initTensorFlow(); + } +} diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index a98fca01325c73994c28fa96770a04424f7997fe..f8b73c7cf3bc2dcd814dd19924a4d5597a7249b7 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -43,6 +43,9 @@ public final class InterpreterTest { private static final File MOBILENET_MODEL_FILE = new File("tensorflow/contrib/lite/java/src/testdata/mobilenet.tflite.bin"); + private static final File FLEX_MODEL_FILE = + new File("tensorflow/contrib/lite/testdata/multi_add_flex.bin"); + @Test public void testInterpreter() throws Exception { Interpreter interpreter = new Interpreter(MODEL_FILE); @@ -345,4 +348,15 @@ public final class InterpreterTest { interpreter.close(); interpreter.close(); } + + /** Smoke test validating that flex model loading fails when the flex delegate is not linked. */ + @Test + public void testFlexModel() throws Exception { + try { + new Interpreter(FLEX_MODEL_FILE); + fail(); + } catch (IllegalStateException e) { + // Expected failure. + } + } } diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index d2d8073abd3123f6b95f3d39dbd4094fc3366d05..f20bb420a0240b01af9248cd5c5ba60f5e329f55 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -586,6 +586,7 @@ tf_cc_test( ":builtin_ops", "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite/kernels:test_util", + "//tensorflow/contrib/lite/schema:schema_fbs", "@com_google_googletest//:gtest", ], ) diff --git a/tensorflow/contrib/lite/kernels/add_test.cc b/tensorflow/contrib/lite/kernels/add_test.cc index 0b5844321133de103919de76d367574f018a6698..261dd36ef0c517bd7880f79948b8ac9682f9bab4 100644 --- a/tensorflow/contrib/lite/kernels/add_test.cc +++ b/tensorflow/contrib/lite/kernels/add_test.cc @@ -108,7 +108,7 @@ TEST(FloatAddOpModel, ActivationRELU_N1_TO_1) { } TEST(FloatAddOpModel, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -124,7 +124,7 @@ TEST(FloatAddOpModel, VariousInputShapes) { } TEST(FloatAddOpModel, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatAddOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -161,7 +161,7 @@ TEST(IntegerAddOpModel, ActivationRELU_N1_TO_1) { } TEST(IntegerAddOpModel, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerAddOpModel m({TensorType_INT32, test_shapes[i]}, @@ -176,7 +176,7 @@ TEST(IntegerAddOpModel, VariousInputShapes) { } TEST(IntegerAddOpModel, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerAddOpModel m({TensorType_INT32, test_shapes[i]}, @@ -193,11 +193,11 @@ TEST(IntegerAddOpModel, WithBroadcast) { TEST(QuantizedAddOpModel, QuantizedTestsNoActivation) { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector> inputs1 = { + std::vector> inputs1 = { {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = { + std::vector> inputs2 = { {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = { + std::vector> results = { {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, @@ -217,11 +217,11 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { const float kMin = -1.f; const float kMax = 32767.f / 32768.f; float kQuantizedTolerance = GetToleranceInt16(kMin, kMax); - std::vector> inputs1 = { + std::vector> inputs1 = { {0.1, 0.2, 0.3, 0.4}, {-0.8, 0.2, 0.4, 0.7}, {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = { + std::vector> inputs2 = { {0.6, 0.4, 0.3, 0.1}, {0.6, 0.4, 0.5, -0.8}, {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = { + std::vector> results = { {0.7, 0.6, 0.6, 0.5}, {-0.2, 0.6, 0.9, -0.1}, {-0.2, 0.6, -0.1, 0.8}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedAddOpModel m({TensorType_INT16, {1, 2, 2, 1}, kMin, kMax}, @@ -240,12 +240,12 @@ TEST(QuantizedAddOpModel, QuantizedTestsNoActivationInt16) { TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) { float kQuantizedTolerance = GetTolerance(-1.0, 1.0); - std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, - {-0.8, 0.2, 0.7, 0.3}}; - std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, - {0.6, 0.4, -0.8, 0.5}}; - std::vector> results = {{-0.2, 0.6, 1.0, -0.1}, - {-0.2, 0.6, -0.1, 0.8}}; + std::vector> inputs1 = {{-0.8, 0.2, 0.9, 0.7}, + {-0.8, 0.2, 0.7, 0.3}}; + std::vector> inputs2 = {{0.6, 0.4, 0.9, -0.8}, + {0.6, 0.4, -0.8, 0.5}}; + std::vector> results = {{-0.2, 0.6, 1.0, -0.1}, + {-0.2, 0.6, -0.1, 0.8}}; for (int i = 0; i < inputs1.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, @@ -262,7 +262,7 @@ TEST(QuantizedAddOpModel, QuantizedTestsActivationRELU_N1_TO_1) { TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, @@ -281,7 +281,7 @@ TEST(QuantizedAddOpModel, QuantizedVariousInputShapes) { TEST(QuantizedAddOpModel, QuantizedWithBroadcast) { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { QuantizedAddOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, diff --git a/tensorflow/contrib/lite/kernels/basic_rnn.cc b/tensorflow/contrib/lite/kernels/basic_rnn.cc index 1aa27602e5e40ca7607488ad9ae5929410266329..53f524cc6cf303ef0ee91ec7350aa098743eb96d 100644 --- a/tensorflow/contrib/lite/kernels/basic_rnn.cc +++ b/tensorflow/contrib/lite/kernels/basic_rnn.cc @@ -114,9 +114,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = batch_size; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {batch_size}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = batch_size; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } @@ -133,6 +134,8 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input, const int batch_size = input->dims->data[0]; const int num_units = input_weights->dims->data[0]; const int input_size = input->dims->data[1]; + const int output_batch_leading_dim = + output->dims->data[output->dims->size - 1]; // Initialize the pointer to hidden state. float* hidden_state_ptr_batch = hidden_state->data.f; @@ -144,10 +147,10 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input, const float* recurrent_weights_ptr = recurrent_weights->data.f; const float* bias_ptr = bias->data.f; - kernel_utils::RnnBatchStep(input_ptr_batch, input_weights_ptr, - recurrent_weights_ptr, bias_ptr, input_size, - num_units, batch_size, params->activation, - hidden_state_ptr_batch, output_ptr_batch); + kernel_utils::RnnBatchStep( + input_ptr_batch, input_weights_ptr, recurrent_weights_ptr, bias_ptr, + input_size, num_units, batch_size, output_batch_leading_dim, + params->activation, hidden_state_ptr_batch, output_ptr_batch); return kTfLiteOk; } @@ -162,6 +165,8 @@ TfLiteStatus EvalHybrid(const TfLiteTensor* input, const int batch_size = input->dims->data[0]; const int num_units = input_weights->dims->data[0]; const int input_size = input->dims->data[1]; + const int output_batch_leading_dim = + output->dims->data[output->dims->size - 1]; // Initialize the pointer to hidden state. float* hidden_state_ptr_batch = hidden_state->data.f; @@ -187,9 +192,9 @@ TfLiteStatus EvalHybrid(const TfLiteTensor* input, kernel_utils::RnnBatchStep( input_ptr_batch, input_weights_ptr, input_weights_scale, recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, input_size, - num_units, batch_size, params->activation, quantized_input_ptr, - quantized_hidden_state_ptr, scaling_factors_ptr, hidden_state_ptr_batch, - output_ptr_batch); + num_units, batch_size, output_batch_leading_dim, params->activation, + quantized_input_ptr, quantized_hidden_state_ptr, scaling_factors_ptr, + hidden_state_ptr_batch, output_ptr_batch); return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc index a326827b1e4654731562b987f3a0704ee9e15279..5325507d960d1a981b05f01e58da61fb2b9f9740 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc @@ -129,14 +129,14 @@ enum TemporaryTensor { kBwScratchBuffer = 1, // Quantized tensors needed for the hybrid kernel. kInputQuantized = 2, - kAuxInputQuantized = 3, // Quantized tensor needed for auxiliary input. - kFwActivationStateQuantized = 4, - kBwActivationStateQuantized = 5, - kFwCellStateQuantized = 6, - kBwCellStateQuantized = 7, - kScalingFactors = 8, - kProductScalingFactors = 9, - kRecoveredCellWeights = 10, + kFwActivationStateQuantized = 3, + kBwActivationStateQuantized = 4, + kFwCellStateQuantized = 5, + kBwCellStateQuantized = 6, + kScalingFactors = 7, + kProductScalingFactors = 8, + kRecoveredCellWeights = 9, + kAuxInputQuantized = 10, // Optional, quantized tensor for auxiliary input. kNumTemporaryTensors = 11 }; @@ -151,7 +151,7 @@ void Free(TfLiteContext* context, void* buffer) { } // Check that input tensor dimensions matches with each other. -TfLiteStatus CheckLstmTensorDimensions( +TfLiteStatus CheckLstmTensorDimensionsAndTypes( TfLiteContext* context, TfLiteNode* node, int n_input, int n_output, int n_cell, int input_to_input_weights_tensor, int input_to_forget_weights_tensor, int input_to_cell_weights_tensor, @@ -172,25 +172,39 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE(context, params->cell_clip >= 0); TF_LITE_ENSURE(context, params->proj_clip >= 0); + const TfLiteTensor* input_to_forget_weights = + GetInput(context, node, input_to_forget_weights_tensor); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); + TF_LITE_ENSURE(context, (input_to_forget_weights->type == kTfLiteFloat32) || + (input_to_forget_weights->type == kTfLiteUInt8)); + const TfLiteTensor* input_to_input_weights = GetOptionalInputTensor(context, node, input_to_input_weights_tensor); if (input_to_input_weights) { TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, input_to_input_weights->type, + input_to_forget_weights->type); } - const TfLiteTensor* input_to_forget_weights = - GetInput(context, node, input_to_forget_weights_tensor); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->size, 2); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[0], n_cell); - TF_LITE_ENSURE_EQ(context, input_to_forget_weights->dims->data[1], n_input); - const TfLiteTensor* input_to_cell_weights = GetInput(context, node, input_to_cell_weights_tensor); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, input_to_cell_weights->type, + input_to_forget_weights->type); + + const TfLiteTensor* input_to_output_weights = + GetInput(context, node, input_to_output_weights_tensor); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->size, 2); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, input_to_output_weights->type, + input_to_forget_weights->type); const TfLiteTensor* recurrent_to_input_weights = GetOptionalInputTensor(context, node, recurrent_to_input_weights_tensor); @@ -200,6 +214,8 @@ TfLiteStatus CheckLstmTensorDimensions( n_cell); TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->data[1], n_output); + TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* recurrent_to_forget_weights = @@ -209,6 +225,8 @@ TfLiteStatus CheckLstmTensorDimensions( n_cell); TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->dims->data[1], n_output); + TF_LITE_ENSURE_EQ(context, recurrent_to_forget_weights->type, + input_to_forget_weights->type); const TfLiteTensor* recurrent_to_cell_weights = GetInput(context, node, recurrent_to_cell_weights_tensor); @@ -216,6 +234,8 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[0], n_cell); TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->dims->data[1], n_output); + TF_LITE_ENSURE_EQ(context, recurrent_to_cell_weights->type, + input_to_forget_weights->type); // We make sure the input-gate's parameters are either both present (regular // LSTM) or not at all (CIFG-LSTM). @@ -231,6 +251,8 @@ TfLiteStatus CheckLstmTensorDimensions( if (cell_to_input_weights) { TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_to_input_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* cell_to_forget_weights = @@ -238,6 +260,8 @@ TfLiteStatus CheckLstmTensorDimensions( if (cell_to_forget_weights) { TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* cell_to_output_weights = @@ -245,6 +269,8 @@ TfLiteStatus CheckLstmTensorDimensions( if (cell_to_output_weights) { TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_to_output_weights->type, + input_to_forget_weights->type); } // Making sure the peephole weights are there all or none. @@ -266,22 +292,26 @@ TfLiteStatus CheckLstmTensorDimensions( } else { TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, input_gate_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, input_gate_bias->type, kTfLiteFloat32); } const TfLiteTensor* forget_gate_bias = GetInput(context, node, forget_gate_bias_tensor); TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, forget_gate_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, forget_gate_bias->type, kTfLiteFloat32); const TfLiteTensor* cell_bias = GetInput(context, node, cell_gate_bias_tensor); TF_LITE_ENSURE_EQ(context, cell_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, cell_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, cell_bias->type, kTfLiteFloat32); const TfLiteTensor* output_gate_bias = GetInput(context, node, output_gate_bias_tensor); TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell); + TF_LITE_ENSURE_EQ(context, output_gate_bias->type, kTfLiteFloat32); const TfLiteTensor* projection_weights = GetOptionalInputTensor(context, node, projection_weights_tensor); @@ -289,6 +319,8 @@ TfLiteStatus CheckLstmTensorDimensions( TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[0], n_output); TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell); + TF_LITE_ENSURE_EQ(context, projection_weights->type, + input_to_forget_weights->type); } const TfLiteTensor* projection_bias = @@ -296,6 +328,7 @@ TfLiteStatus CheckLstmTensorDimensions( if (projection_bias) { TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1); TF_LITE_ENSURE_EQ(context, projection_bias->dims->data[0], n_output); + TF_LITE_ENSURE_EQ(context, projection_bias->type, kTfLiteFloat32); } // Making sure the projection tensors are consistent: @@ -315,7 +348,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, int n_output, int n_cell) { TF_LITE_ENSURE_OK( context, - CheckLstmTensorDimensions( + CheckLstmTensorDimensionsAndTypes( context, node, n_input, n_output, n_cell, kFwInputToInputWeightsTensor, kFwInputToForgetWeightsTensor, kFwInputToCellWeightsTensor, kFwInputToOutputWeightsTensor, @@ -329,7 +362,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, TF_LITE_ENSURE_OK( context, - CheckLstmTensorDimensions( + CheckLstmTensorDimensionsAndTypes( context, node, n_input, n_output, n_cell, kBwInputToInputWeightsTensor, kBwInputToForgetWeightsTensor, kBwInputToCellWeightsTensor, kBwInputToOutputWeightsTensor, @@ -379,12 +412,16 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->dims->data[1], n_input); + TF_LITE_ENSURE_EQ(context, bw_input_to_output_weights->type, + fw_input_to_output_weights->type); const TfLiteTensor* fw_recurrent_to_output_weights = GetInput(context, node, kFwRecurrentToOutputWeightsTensor); TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->dims->data[0], n_fw_cell); + TF_LITE_ENSURE_EQ(context, fw_recurrent_to_output_weights->type, + fw_input_to_output_weights->type); const int n_fw_output = fw_recurrent_to_output_weights->dims->data[1]; const TfLiteTensor* bw_recurrent_to_output_weights = @@ -392,6 +429,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->size, 2); TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->dims->data[0], n_bw_cell); + TF_LITE_ENSURE_EQ(context, bw_recurrent_to_output_weights->type, + fw_input_to_output_weights->type); const int n_bw_output = bw_recurrent_to_output_weights->dims->data[1]; // Check that input tensor dimensions matches with each other. @@ -469,7 +508,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArrayFree(node->temporaries); if (is_hybrid_op) { - node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors); + node->temporaries = TfLiteIntArrayCreate( + has_aux_input ? kNumTemporaryTensors : kNumTemporaryTensors - 1); } else { node->temporaries = TfLiteIntArrayCreate(2); // the two scratch buffers. } @@ -482,11 +522,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* fw_input_to_input_weights = GetOptionalInputTensor(context, node, kFwInputToInputWeightsTensor); - if (has_aux_input) { + const bool fw_use_cifg = (fw_input_to_input_weights == nullptr); + if (has_aux_input && !fw_use_cifg) { TF_LITE_ENSURE_EQ(context, fw_aux_input_to_input_weights->dims->data[0], fw_input_to_input_weights->dims->data[0]); } - const bool fw_use_cifg = (fw_input_to_input_weights == nullptr); TfLiteIntArray* fw_scratch_buffer_size = TfLiteIntArrayCreate(2); fw_scratch_buffer_size->data[0] = n_batch; if (fw_use_cifg) { @@ -539,11 +579,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const TfLiteTensor* bw_input_to_input_weights = GetOptionalInputTensor(context, node, kBwInputToInputWeightsTensor); - if (has_aux_input) { + const bool bw_use_cifg = (bw_input_to_input_weights == nullptr); + if (has_aux_input && !bw_use_cifg) { TF_LITE_ENSURE_EQ(context, bw_aux_input_to_input_weights->dims->data[0], bw_input_to_input_weights->dims->data[0]); } - const bool bw_use_cifg = (bw_input_to_input_weights == nullptr); TfLiteIntArray* bw_scratch_buffer_size = TfLiteIntArrayCreate(2); bw_scratch_buffer_size->data[0] = n_batch; if (bw_use_cifg) { @@ -570,22 +610,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { input_quantized_size)); } - if (has_aux_input) { - node->temporaries->data[kAuxInputQuantized] = - *scratch_tensor_index + kAuxInputQuantized; - TfLiteTensor* aux_input_quantized = - GetTemporary(context, node, kAuxInputQuantized); - aux_input_quantized->type = kTfLiteUInt8; - aux_input_quantized->allocation_type = kTfLiteArenaRw; - if (!TfLiteIntArrayEqual(aux_input_quantized->dims, aux_input->dims)) { - TfLiteIntArray* aux_input_quantized_size = - TfLiteIntArrayCopy(aux_input->dims); - TF_LITE_ENSURE_OK(context, - context->ResizeTensor(context, aux_input_quantized, - aux_input_quantized_size)); - } - } - node->temporaries->data[kFwActivationStateQuantized] = *scratch_tensor_index + kFwActivationStateQuantized; TfLiteTensor* fw_activation_state_quantized = @@ -654,9 +678,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kScalingFactors); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {n_batch}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } @@ -666,10 +691,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kProductScalingFactors); prod_scaling_factors->type = kTfLiteFloat32; prod_scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); - prod_scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(prod_scaling_factors->dims, - prod_scaling_factors_size)) { + if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1, + scaling_dims)) { + TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); + prod_scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, prod_scaling_factors, prod_scaling_factors_size)); @@ -683,14 +708,33 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kRecoveredCellWeights); recovered_cell_weights->type = kTfLiteFloat32; recovered_cell_weights->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1); - recovered_cell_weights_size->data[0] = n_fw_cell; - if (!TfLiteIntArrayEqual(recovered_cell_weights->dims, - recovered_cell_weights_size)) { + int recovered_cell_dims[1] = {n_fw_cell}; + if (!TfLiteIntArrayEqualsArray(recovered_cell_weights->dims, 1, + recovered_cell_dims)) { + TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1); + recovered_cell_weights_size->data[0] = n_fw_cell; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, recovered_cell_weights, recovered_cell_weights_size)); } + + // Only allocate a temporary tensor for quantized auxiliary input if we are + // actually going to use it. + if (has_aux_input) { + node->temporaries->data[kAuxInputQuantized] = + *scratch_tensor_index + kAuxInputQuantized; + TfLiteTensor* aux_input_quantized = + GetTemporary(context, node, kAuxInputQuantized); + aux_input_quantized->type = kTfLiteUInt8; + aux_input_quantized->allocation_type = kTfLiteArenaRw; + if (!TfLiteIntArrayEqual(aux_input_quantized->dims, aux_input->dims)) { + TfLiteIntArray* aux_input_quantized_size = + TfLiteIntArrayCopy(aux_input->dims); + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, aux_input_quantized, + aux_input_quantized_size)); + } + } } return kTfLiteOk; } @@ -868,8 +912,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { case kTfLiteUInt8: { TfLiteTensor* input_quantized = GetTemporary(context, node, kInputQuantized); - TfLiteTensor* aux_input_quantized = - GetTemporary(context, node, kAuxInputQuantized); TfLiteTensor* fw_activation_state_quantized = GetTemporary(context, node, kFwActivationStateQuantized); TfLiteTensor* bw_activation_state_quantized = @@ -884,6 +926,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kProductScalingFactors); TfLiteTensor* recovered_cell_weights = GetTemporary(context, node, kRecoveredCellWeights); + TfLiteTensor* aux_input_quantized = + (aux_input == nullptr) + ? nullptr + : GetTemporary(context, node, kAuxInputQuantized); TfLiteStatus fw_pass_status = lstm_eval::EvalHybrid( input, fw_input_to_input_weights, fw_input_to_forget_weights, diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc index 9cc04907e1e7e710ba5d1e883ca2e784f89184d6..db98d6c49d42ac9991d2512de54a60c093cf5799 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm_test.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ // Unit test for TFLite Bidirectional LSTM op. +#include #include #include #include @@ -24,6 +25,7 @@ limitations under the License. #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/kernels/test_util.h" #include "tensorflow/contrib/lite/model.h" +#include "tensorflow/contrib/lite/schema/schema_generated.h" namespace tflite { namespace { @@ -37,6 +39,7 @@ class BidirectionalLSTMOpModel : public SingleOpModel { bool use_peephole, bool use_projection_weights, bool use_projection_bias, bool merge_outputs, float cell_clip, float proj_clip, + bool quantize_weights, const std::vector>& input_shapes) : n_batch_(n_batch), n_input_(n_input), @@ -44,37 +47,40 @@ class BidirectionalLSTMOpModel : public SingleOpModel { n_bw_cell_(n_cell), n_fw_output_(n_output), n_bw_output_(n_output), - sequence_length_(sequence_length) { + sequence_length_(sequence_length), + quantize_weights_(quantize_weights) { input_ = AddInput(TensorType_FLOAT32); + const auto weight_type = + quantize_weights_ ? TensorType_UINT8 : TensorType_FLOAT32; if (use_cifg) { fw_input_to_input_weights_ = AddNullInput(); } else { - fw_input_to_input_weights_ = AddInput(TensorType_FLOAT32); + fw_input_to_input_weights_ = AddInput(weight_type); } - fw_input_to_forget_weights_ = AddInput(TensorType_FLOAT32); - fw_input_to_cell_weights_ = AddInput(TensorType_FLOAT32); - fw_input_to_output_weights_ = AddInput(TensorType_FLOAT32); + fw_input_to_forget_weights_ = AddInput(weight_type); + fw_input_to_cell_weights_ = AddInput(weight_type); + fw_input_to_output_weights_ = AddInput(weight_type); if (use_cifg) { fw_recurrent_to_input_weights_ = AddNullInput(); } else { - fw_recurrent_to_input_weights_ = AddInput(TensorType_FLOAT32); + fw_recurrent_to_input_weights_ = AddInput(weight_type); } - fw_recurrent_to_forget_weights_ = AddInput(TensorType_FLOAT32); - fw_recurrent_to_cell_weights_ = AddInput(TensorType_FLOAT32); - fw_recurrent_to_output_weights_ = AddInput(TensorType_FLOAT32); + fw_recurrent_to_forget_weights_ = AddInput(weight_type); + fw_recurrent_to_cell_weights_ = AddInput(weight_type); + fw_recurrent_to_output_weights_ = AddInput(weight_type); if (use_peephole) { if (use_cifg) { fw_cell_to_input_weights_ = AddNullInput(); } else { - fw_cell_to_input_weights_ = AddInput(TensorType_FLOAT32); + fw_cell_to_input_weights_ = AddInput(weight_type); } - fw_cell_to_forget_weights_ = AddInput(TensorType_FLOAT32); - fw_cell_to_output_weights_ = AddInput(TensorType_FLOAT32); + fw_cell_to_forget_weights_ = AddInput(weight_type); + fw_cell_to_output_weights_ = AddInput(weight_type); } else { fw_cell_to_input_weights_ = AddNullInput(); fw_cell_to_forget_weights_ = AddNullInput(); @@ -105,31 +111,31 @@ class BidirectionalLSTMOpModel : public SingleOpModel { if (use_cifg) { bw_input_to_input_weights_ = AddNullInput(); } else { - bw_input_to_input_weights_ = AddInput(TensorType_FLOAT32); + bw_input_to_input_weights_ = AddInput(weight_type); } - bw_input_to_forget_weights_ = AddInput(TensorType_FLOAT32); - bw_input_to_cell_weights_ = AddInput(TensorType_FLOAT32); - bw_input_to_output_weights_ = AddInput(TensorType_FLOAT32); + bw_input_to_forget_weights_ = AddInput(weight_type); + bw_input_to_cell_weights_ = AddInput(weight_type); + bw_input_to_output_weights_ = AddInput(weight_type); if (use_cifg) { bw_recurrent_to_input_weights_ = AddNullInput(); } else { - bw_recurrent_to_input_weights_ = AddInput(TensorType_FLOAT32); + bw_recurrent_to_input_weights_ = AddInput(weight_type); } - bw_recurrent_to_forget_weights_ = AddInput(TensorType_FLOAT32); - bw_recurrent_to_cell_weights_ = AddInput(TensorType_FLOAT32); - bw_recurrent_to_output_weights_ = AddInput(TensorType_FLOAT32); + bw_recurrent_to_forget_weights_ = AddInput(weight_type); + bw_recurrent_to_cell_weights_ = AddInput(weight_type); + bw_recurrent_to_output_weights_ = AddInput(weight_type); if (use_peephole) { if (use_cifg) { bw_cell_to_input_weights_ = AddNullInput(); } else { - bw_cell_to_input_weights_ = AddInput(TensorType_FLOAT32); + bw_cell_to_input_weights_ = AddInput(weight_type); } - bw_cell_to_forget_weights_ = AddInput(TensorType_FLOAT32); - bw_cell_to_output_weights_ = AddInput(TensorType_FLOAT32); + bw_cell_to_forget_weights_ = AddInput(weight_type); + bw_cell_to_output_weights_ = AddInput(weight_type); } else { bw_cell_to_input_weights_ = AddNullInput(); bw_cell_to_forget_weights_ = AddNullInput(); @@ -146,7 +152,7 @@ class BidirectionalLSTMOpModel : public SingleOpModel { bw_output_gate_bias_ = AddInput(TensorType_FLOAT32); if (use_projection_weights) { - bw_projection_weights_ = AddInput(TensorType_FLOAT32); + bw_projection_weights_ = AddInput(weight_type); if (use_projection_bias) { bw_projection_bias_ = AddInput(TensorType_FLOAT32); } else { @@ -198,88 +204,96 @@ class BidirectionalLSTMOpModel : public SingleOpModel { BuildInterpreter(input_shapes); } + void PopulateWeightTensor(int tensor_id, const std::vector& f) { + if (quantize_weights_) { + SymmetricQuantizeAndPopulate(tensor_id, f); + } else { + PopulateTensor(tensor_id, f); + } + } + // Set weights in forward and backward cells to be the same. - void SetInputToInputWeights(std::initializer_list f) { - PopulateTensor(fw_input_to_input_weights_, f); - PopulateTensor(bw_input_to_input_weights_, f); + void SetInputToInputWeights(const std::vector& f) { + PopulateWeightTensor(fw_input_to_input_weights_, f); + PopulateWeightTensor(bw_input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { - PopulateTensor(fw_input_to_forget_weights_, f); - PopulateTensor(bw_input_to_forget_weights_, f); + void SetInputToForgetWeights(const std::vector& f) { + PopulateWeightTensor(fw_input_to_forget_weights_, f); + PopulateWeightTensor(bw_input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { - PopulateTensor(fw_input_to_cell_weights_, f); - PopulateTensor(bw_input_to_cell_weights_, f); + void SetInputToCellWeights(const std::vector& f) { + PopulateWeightTensor(fw_input_to_cell_weights_, f); + PopulateWeightTensor(bw_input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { - PopulateTensor(fw_input_to_output_weights_, f); - PopulateTensor(bw_input_to_output_weights_, f); + void SetInputToOutputWeights(const std::vector& f) { + PopulateWeightTensor(fw_input_to_output_weights_, f); + PopulateWeightTensor(bw_input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { - PopulateTensor(fw_recurrent_to_input_weights_, f); - PopulateTensor(bw_recurrent_to_input_weights_, f); + void SetRecurrentToInputWeights(const std::vector& f) { + PopulateWeightTensor(fw_recurrent_to_input_weights_, f); + PopulateWeightTensor(bw_recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { - PopulateTensor(fw_recurrent_to_forget_weights_, f); - PopulateTensor(bw_recurrent_to_forget_weights_, f); + void SetRecurrentToForgetWeights(const std::vector& f) { + PopulateWeightTensor(fw_recurrent_to_forget_weights_, f); + PopulateWeightTensor(bw_recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { - PopulateTensor(fw_recurrent_to_cell_weights_, f); - PopulateTensor(bw_recurrent_to_cell_weights_, f); + void SetRecurrentToCellWeights(const std::vector& f) { + PopulateWeightTensor(fw_recurrent_to_cell_weights_, f); + PopulateWeightTensor(bw_recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { - PopulateTensor(fw_recurrent_to_output_weights_, f); - PopulateTensor(bw_recurrent_to_output_weights_, f); + void SetRecurrentToOutputWeights(const std::vector& f) { + PopulateWeightTensor(fw_recurrent_to_output_weights_, f); + PopulateWeightTensor(bw_recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { - PopulateTensor(fw_cell_to_input_weights_, f); - PopulateTensor(bw_cell_to_input_weights_, f); + void SetCellToInputWeights(const std::vector& f) { + PopulateWeightTensor(fw_cell_to_input_weights_, f); + PopulateWeightTensor(bw_cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { - PopulateTensor(fw_cell_to_forget_weights_, f); - PopulateTensor(bw_cell_to_forget_weights_, f); + void SetCellToForgetWeights(const std::vector& f) { + PopulateWeightTensor(fw_cell_to_forget_weights_, f); + PopulateWeightTensor(bw_cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { - PopulateTensor(fw_cell_to_output_weights_, f); - PopulateTensor(bw_cell_to_output_weights_, f); + void SetCellToOutputWeights(const std::vector& f) { + PopulateWeightTensor(fw_cell_to_output_weights_, f); + PopulateWeightTensor(bw_cell_to_output_weights_, f); } - void SetInputGateBias(std::initializer_list f) { + void SetInputGateBias(const std::vector& f) { PopulateTensor(fw_input_gate_bias_, f); PopulateTensor(bw_input_gate_bias_, f); } - void SetForgetGateBias(std::initializer_list f) { + void SetForgetGateBias(const std::vector& f) { PopulateTensor(fw_forget_gate_bias_, f); PopulateTensor(bw_forget_gate_bias_, f); } - void SetCellBias(std::initializer_list f) { + void SetCellBias(const std::vector& f) { PopulateTensor(fw_cell_bias_, f); PopulateTensor(bw_cell_bias_, f); } - void SetOutputGateBias(std::initializer_list f) { + void SetOutputGateBias(const std::vector& f) { PopulateTensor(fw_output_gate_bias_, f); PopulateTensor(bw_output_gate_bias_, f); } - void SetProjectionWeights(std::initializer_list f) { - PopulateTensor(fw_projection_weights_, f); - PopulateTensor(bw_projection_weights_, f); + void SetProjectionWeights(const std::vector& f) { + PopulateWeightTensor(fw_projection_weights_, f); + PopulateWeightTensor(bw_projection_weights_, f); } - void SetProjectionBias(std::initializer_list f) { + void SetProjectionBias(const std::vector& f) { PopulateTensor(fw_projection_bias_, f); PopulateTensor(bw_projection_bias_, f); } @@ -370,21 +384,30 @@ class BidirectionalLSTMOpModel : public SingleOpModel { int n_fw_output_; int n_bw_output_; int sequence_length_; + + bool quantize_weights_; }; -TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { +// Declare LSTMOpTest as a parameterized test, where the parameter is a boolean +// indicating whether to use quantization or not. +class LSTMOpTest : public ::testing::TestWithParam {}; + +INSTANTIATE_TEST_CASE_P(QuantizationOrNot, LSTMOpTest, ::testing::Bool()); + +TEST_P(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { const int n_batch = 1; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. const int n_cell = 4; const int n_output = 4; const int sequence_length = 3; + const bool quantize_weights = GetParam(); BidirectionalLSTMOpModel lstm( n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*proj_clip=*/0.0, quantize_weights, { {sequence_length, n_batch, n_input}, // input tensor @@ -502,9 +525,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { -0.03716109, 0.12507336, 0.41193449, -0.20860538, -0.15053082, 0.09120187, 0.24278517, -0.12222792}; static float lstm_bw_golden_output[] = { - -0.0806187, 0.139077, 0.400476, -0.197842, - -0.0332076, 0.123838, 0.309777, -0.17621, - -0.0490733, 0.0739237, 0.067706, -0.0208124}; + -0.0806187, 0.139077, 0.400476, -0.197842, -0.0332076, 0.123838, + 0.309777, -0.17621, -0.0490733, 0.0739237, 0.067706, -0.0208124}; float* batch0_start = lstm_input; float* batch0_end = batch0_start + lstm.num_inputs() * lstm.sequence_length(); @@ -519,7 +541,8 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { std::vector fw_expected; fw_expected.insert(fw_expected.end(), fw_golden_start, fw_golden_end); EXPECT_THAT(lstm.GetFwOutput(), - ElementsAreArray(ArrayFloatNear(fw_expected))); + ElementsAreArray( + ArrayFloatNear(fw_expected, quantize_weights ? 1e-2 : 1e-5))); float* bw_golden_start = lstm_bw_golden_output; float* bw_golden_end = @@ -527,23 +550,26 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClipping) { std::vector bw_expected; bw_expected.insert(bw_expected.end(), bw_golden_start, bw_golden_end); EXPECT_THAT(lstm.GetBwOutput(), - ElementsAreArray(ArrayFloatNear(bw_expected))); + ElementsAreArray( + ArrayFloatNear(bw_expected, quantize_weights ? 1e-2 : 1e-5))); } -// Same as the previous test, yet with a single merged output tensor. -TEST(LSTMOpTest, BlackBoxTestMergedOutput) { - const int n_batch = 1; +// Same as the previous test, yet with a single merged output tensor and n_batch +// of 2. +TEST_P(LSTMOpTest, BlackBoxTestMergedOutput) { + const int n_batch = 2; const int n_input = 2; // n_cell and n_output have the same size when there is no projection. const int n_cell = 4; const int n_output = 4; const int sequence_length = 3; + const bool quantize_weights = GetParam(); BidirectionalLSTMOpModel lstm( n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*merge_outputs=*/true, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*proj_clip=*/0.0, quantize_weights, { {sequence_length, n_batch, n_input}, // input tensor @@ -655,24 +681,29 @@ TEST(LSTMOpTest, BlackBoxTestMergedOutput) { -0.51818722, -0.15390486, 0.0468148, 0.39922136}); // Input should have n_input * sequence_length many values. - static float lstm_input[] = {2., 3., 3., 4., 1., 1.}; + static float lstm_input[] = {2., 3., 2., 3., 3., 4., 3., 4., 1., 1., 1., 1.}; static float lstm_fw_golden_output[] = { - -0.02973187, 0.1229473, 0.20885126, -0.15358765, - -0.03716109, 0.12507336, 0.41193449, -0.20860538, - -0.15053082, 0.09120187, 0.24278517, -0.12222792}; + -0.02973187, 0.1229473, 0.20885126, -0.15358765, -0.02973187, + 0.1229473, 0.20885126, -0.15358765, -0.03716109, 0.12507336, + 0.41193449, -0.20860538, -0.03716109, 0.12507336, 0.41193449, + -0.20860538, -0.15053082, 0.09120187, 0.24278517, -0.12222792, + -0.15053082, 0.09120187, 0.24278517, -0.12222792}; static float lstm_bw_golden_output[] = { - -0.0806187, 0.139077, 0.400476, -0.197842, -0.0332076, 0.123838, - 0.309777, -0.17621, -0.0490733, 0.0739237, 0.067706, -0.0208124}; + -0.0806187, 0.139077, 0.400476, -0.197842, -0.0806187, 0.139077, + 0.400476, -0.197842, -0.0332076, 0.123838, 0.309777, -0.17621, + -0.0332076, 0.123838, 0.309777, -0.17621, -0.0490733, 0.0739237, + 0.067706, -0.0208124, -0.0490733, 0.0739237, 0.067706, -0.0208124}; float* batch0_start = lstm_input; - float* batch0_end = batch0_start + lstm.num_inputs() * lstm.sequence_length(); + float* batch0_end = batch0_start + lstm.num_inputs() * lstm.num_batches() * + lstm.sequence_length(); lstm.SetInput(0, batch0_start, batch0_end); lstm.Invoke(); std::vector merged_expected; - for (int k = 0; k < lstm.sequence_length(); k++) { + for (int k = 0; k < lstm.sequence_length() * lstm.num_batches(); k++) { merged_expected.insert( merged_expected.end(), lstm_fw_golden_output + k * lstm.num_fw_outputs(), @@ -683,7 +714,8 @@ TEST(LSTMOpTest, BlackBoxTestMergedOutput) { lstm_bw_golden_output + (k + 1) * lstm.num_bw_outputs()); } EXPECT_THAT(lstm.GetFwOutput(), - ElementsAreArray(ArrayFloatNear(merged_expected))); + ElementsAreArray(ArrayFloatNear(merged_expected, + quantize_weights ? 1e-2 : 1e-5))); } TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) { @@ -698,7 +730,7 @@ TEST(LSTMOpTest, BlackBoxTestNoCifgNoPeepholeNoProjectionNoClippingReverse) { n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false, /*use_peephole=*/false, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*proj_clip=*/0.0, /*quantize_weights=*/false, { {sequence_length, n_batch, n_input}, // input tensor @@ -858,7 +890,7 @@ TEST(LSTMOpTest, BlackBoxTestWithCifgWithPeepholeNoProjectionNoClipping) { n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*proj_clip=*/0.0, /*quantize_weights=*/false, { {sequence_length, n_batch, n_input}, // input tensor @@ -1008,7 +1040,7 @@ TEST(LSTMOpTest, n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/true, /*use_peephole=*/true, /*use_projection_weights=*/false, /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*proj_clip=*/0.0, /*quantize_weights=*/false, { {sequence_length, n_batch, n_input}, // input tensor @@ -1158,7 +1190,7 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) { n_batch, n_input, n_cell, n_output, sequence_length, /*use_cifg=*/false, /*use_peephole=*/true, /*use_projection_weights=*/true, /*use_projection_bias=*/false, /*merge_outputs=*/false, /*cell_clip=*/0.0, - /*proj_clip=*/0.0, + /*proj_clip=*/0.0, /*quantize_weights=*/false, { {sequence_length, n_batch, n_input}, // input tensor @@ -1779,31 +1811,28 @@ TEST(LSTMOpTest, BlackBoxTestWithPeepholeWithProjectionNoClipping) { 0.0412031, 0.0118723, 0.0239643, 0.0394009}}; static float lstm_combined_golden_output[][64] = { - { - -0.022014, 0.073544, -0.002235, 0.040068, -0.037136, -0.052788, - 0.075325, -0.029378, 0.024298, -0.07733 , -0.030674, -0.060229, - 0.040599, 0.011608, 0.042005, 0.045977, -0.039225, 0.076294, - 0.000735, 0.032852, -0.069869, -0.053312, 0.073527, -0.028136, - 0.021585, -0.102679, -0.004327, -0.043304, 0.072861, 0.027077, - 0.034558, 0.068292, -0.036292, 0.069832, -0.003032, 0.053829, - -0.043821, -0.072713, 0.085029, -0.040374, 0.020014, -0.104521, - -0.034504, -0.059759, 0.062569, 0.025652, 0.049306, 0.061189, - -0.025146, 0.079643, -0.005188, 0.033080, -0.048079, -0.048082, - 0.069369, -0.028900, 0.024572, -0.077547, -0.022517, -0.054477, - 0.038857, 0.013336, 0.043234, 0.044788}, - { - -0.039186, 0.070792, -0.005913, 0.02642, -0.068274, -0.05022, - 0.061444, -0.031241, 0.014996, -0.094544, -0.004146, -0.03464, - 0.058981, 0.026097, 0.039781, 0.058408, -0.031887, 0.069252, - 0.00576, 0.054062, -0.042801, -0.059974, 0.085272, -0.034453, - 0.026097, -0.0959, -0.031164, -0.058699, 0.06839, 0.020512, - 0.044727, 0.063609, -0.039863, 0.084819, -0.003909, 0.028666, - -0.075677, -0.045125, 0.070379, -0.033895, 0.022111, -0.097184, - -0.004921, -0.040851, 0.062316, 0.017435, 0.041437, 0.064568, - -0.039656, 0.060726, -0.003402, 0.036854, -0.056503, -0.058554, - 0.068588, -0.034879, 0.01352, -0.09962, -0.01434, -0.039505, - 0.065133, 0.024321, 0.038473, 0.062438 - }}; + {-0.022014, 0.073544, -0.002235, 0.040068, -0.037136, -0.052788, + 0.075325, -0.029378, 0.024298, -0.07733, -0.030674, -0.060229, + 0.040599, 0.011608, 0.042005, 0.045977, -0.039225, 0.076294, + 0.000735, 0.032852, -0.069869, -0.053312, 0.073527, -0.028136, + 0.021585, -0.102679, -0.004327, -0.043304, 0.072861, 0.027077, + 0.034558, 0.068292, -0.036292, 0.069832, -0.003032, 0.053829, + -0.043821, -0.072713, 0.085029, -0.040374, 0.020014, -0.104521, + -0.034504, -0.059759, 0.062569, 0.025652, 0.049306, 0.061189, + -0.025146, 0.079643, -0.005188, 0.033080, -0.048079, -0.048082, + 0.069369, -0.028900, 0.024572, -0.077547, -0.022517, -0.054477, + 0.038857, 0.013336, 0.043234, 0.044788}, + {-0.039186, 0.070792, -0.005913, 0.02642, -0.068274, -0.05022, + 0.061444, -0.031241, 0.014996, -0.094544, -0.004146, -0.03464, + 0.058981, 0.026097, 0.039781, 0.058408, -0.031887, 0.069252, + 0.00576, 0.054062, -0.042801, -0.059974, 0.085272, -0.034453, + 0.026097, -0.0959, -0.031164, -0.058699, 0.06839, 0.020512, + 0.044727, 0.063609, -0.039863, 0.084819, -0.003909, 0.028666, + -0.075677, -0.045125, 0.070379, -0.033895, 0.022111, -0.097184, + -0.004921, -0.040851, 0.062316, 0.017435, 0.041437, 0.064568, + -0.039656, 0.060726, -0.003402, 0.036854, -0.056503, -0.058554, + 0.068588, -0.034879, 0.01352, -0.09962, -0.01434, -0.039505, + 0.065133, 0.024321, 0.038473, 0.062438}}; for (int i = 0; i < lstm.sequence_length(); i++) { float* batch0_start = lstm_input[0] + i * lstm.num_inputs(); diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc index c22a457a71d928a139877c78cd39bd3c2ec7e283..6b702af47647c25f265048b6a0ab6a528dc89e84 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn.cc @@ -14,8 +14,8 @@ limitations under the License. ==============================================================================*/ #include #include -#include #include +#include #include #include @@ -114,8 +114,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TF_LITE_ENSURE_EQ(context, input->type, kTfLiteFloat32); TF_LITE_ENSURE_EQ(context, input->dims->size, 3); - const int batch_size = input->dims->data[0]; - const int max_time = input->dims->data[1]; + const bool time_major = params->time_major; + const int batch_size = + (time_major) ? input->dims->data[1] : input->dims->data[0]; + const int max_time = + (time_major) ? input->dims->data[0] : input->dims->data[1]; const int fw_num_units = fw_input_weights->dims->data[0]; const int bw_num_units = bw_input_weights->dims->data[0]; TF_LITE_ASSERT_EQ(input->dims->data[2], fw_input_weights->dims->data[1]); @@ -210,9 +213,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kScalingFactors); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = batch_size; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {batch_size}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = batch_size; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } @@ -237,8 +241,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { // Resize outputs. TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor); TfLiteIntArray* fw_output_size_array = TfLiteIntArrayCreate(3); - fw_output_size_array->data[0] = batch_size; - fw_output_size_array->data[1] = max_time; + fw_output_size_array->data[0] = (time_major) ? max_time : batch_size; + fw_output_size_array->data[1] = (time_major) ? batch_size : max_time; fw_output_size_array->data[2] = params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units; TF_LITE_ENSURE_OK( @@ -266,8 +270,11 @@ TfLiteStatus EvalFloat( const TfLiteBidirectionalSequenceRNNParams* params, TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) { - const int batch_size = input->dims->data[0]; - const int max_time = input->dims->data[1]; + const bool time_major = params->time_major; + const int batch_size = + (time_major) ? input->dims->data[1] : input->dims->data[0]; + const int max_time = + (time_major) ? input->dims->data[0] : input->dims->data[1]; const int input_size = input->dims->data[2]; const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0; @@ -292,48 +299,92 @@ TfLiteStatus EvalFloat( params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units; const int bw_output_step = params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units; - for (int b = 0; b < batch_size; b++) { + if (time_major) { // Forward cell. - float* fw_hidden_state_ptr_batch = - fw_hidden_state->data.f + b * fw_num_units; - float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time; + float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f; for (int s = 0; s < max_time; s++) { const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; + input->data.f + s * input_size * batch_size; const float* aux_input_ptr_batch = (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size + ? aux_input->data.f + s * input_size * batch_size : nullptr; - float* output_ptr_batch = fw_output_offset + s * fw_output_step; + float* output_ptr_batch = + fw_output->data.f + s * fw_output_step * batch_size; kernel_utils::RnnBatchStep( input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch, fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr, - input_size, aux_input_size, fw_num_units, /*batch_size=*/1, + input_size, aux_input_size, fw_num_units, batch_size, fw_output_step, params->activation, fw_hidden_state_ptr_batch, output_ptr_batch); } // Backward cell. - float* bw_hidden_state_ptr_batch = - bw_hidden_state->data.f + b * bw_num_units; - float* bw_output_offset = - params->merge_outputs - ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units - : bw_output->data.f + b * bw_output_step * max_time; + float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f; for (int s = max_time - 1; s >= 0; s--) { const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; + input->data.f + s * input_size * batch_size; const float* aux_input_ptr_batch = (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size + ? aux_input->data.f + s * input_size * batch_size : nullptr; - float* output_ptr_batch = bw_output_offset + s * bw_output_step; + float* output_ptr_batch = + (params->merge_outputs ? fw_output->data.f + fw_num_units + : bw_output->data.f) + + s * bw_output_step * batch_size; kernel_utils::RnnBatchStep( input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch, bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr, - input_size, aux_input_size, bw_num_units, /*batch_size=*/1, + input_size, aux_input_size, bw_num_units, batch_size, bw_output_step, params->activation, bw_hidden_state_ptr_batch, output_ptr_batch); } + } else { + for (int b = 0; b < batch_size; b++) { + // Forward cell. + float* fw_hidden_state_ptr_batch = + fw_hidden_state->data.f + b * fw_num_units; + float* fw_output_offset = + fw_output->data.f + b * fw_output_step * max_time; + for (int s = 0; s < max_time; s++) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = fw_output_offset + s * fw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, fw_input_weights_ptr, aux_input_ptr_batch, + fw_aux_input_weights_ptr, fw_recurrent_weights_ptr, fw_bias_ptr, + input_size, aux_input_size, fw_num_units, /*batch_size=*/1, + fw_output_step, params->activation, fw_hidden_state_ptr_batch, + output_ptr_batch); + } + // Backward cell. + float* bw_hidden_state_ptr_batch = + bw_hidden_state->data.f + b * bw_num_units; + float* bw_output_offset = + params->merge_outputs + ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units + : bw_output->data.f + b * bw_output_step * max_time; + for (int s = max_time - 1; s >= 0; s--) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = bw_output_offset + s * bw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, bw_input_weights_ptr, aux_input_ptr_batch, + bw_aux_input_weights_ptr, bw_recurrent_weights_ptr, bw_bias_ptr, + input_size, aux_input_size, bw_num_units, /*batch_size=*/1, + bw_output_step, params->activation, bw_hidden_state_ptr_batch, + output_ptr_batch); + } + } } return kTfLiteOk; } @@ -351,8 +402,11 @@ TfLiteStatus EvalHybrid( TfLiteTensor* fw_hidden_state, TfLiteTensor* fw_output, TfLiteTensor* bw_hidden_state_quantized, TfLiteTensor* bw_hidden_state, TfLiteTensor* bw_output) { - const int batch_size = input->dims->data[0]; - const int max_time = input->dims->data[1]; + const bool time_major = params->time_major; + const int batch_size = + (time_major) ? input->dims->data[1] : input->dims->data[0]; + const int max_time = + (time_major) ? input->dims->data[0] : input->dims->data[1]; const int input_size = input->dims->data[2]; const int aux_input_size = (aux_input) ? aux_input->dims->data[2] : 0; @@ -403,55 +457,107 @@ TfLiteStatus EvalHybrid( params->merge_outputs ? fw_num_units + bw_num_units : fw_num_units; const int bw_output_step = params->merge_outputs ? fw_num_units + bw_num_units : bw_num_units; - for (int b = 0; b < batch_size; b++) { - // Forward cell. - float* fw_hidden_state_ptr_batch = - fw_hidden_state->data.f + b * fw_num_units; - float* fw_output_offset = fw_output->data.f + b * fw_output_step * max_time; - for (int s = 0; s < max_time; s++) { - const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; - const float* aux_input_ptr_batch = - (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size - : nullptr; - float* output_ptr_batch = fw_output_offset + s * fw_output_step; - - kernel_utils::RnnBatchStep( - input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale, - aux_input_ptr_batch, aux_fw_input_weights_ptr, - aux_fw_input_weights_scale, fw_recurrent_weights_ptr, - fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size, - fw_num_units, /*batch_size=*/1, params->activation, - quantized_input_ptr, aux_quantized_input_ptr, - fw_quantized_hidden_state_ptr, scaling_factors_ptr, - fw_hidden_state_ptr_batch, output_ptr_batch); + if (time_major) { + for (int t = 0; t < max_time; t++) { + // Forward cell. + float* fw_hidden_state_ptr_batch = fw_hidden_state->data.f; + for (int s = 0; s < max_time; s++) { + const float* input_ptr_batch = + input->data.f + s * input_size * batch_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + s * input_size * batch_size + : nullptr; + float* output_ptr_batch = + fw_output->data.f + s * fw_output_step * batch_size; + + kernel_utils::RnnBatchStep( + input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale, + aux_input_ptr_batch, aux_fw_input_weights_ptr, + aux_fw_input_weights_scale, fw_recurrent_weights_ptr, + fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size, + fw_num_units, batch_size, fw_output_step, params->activation, + quantized_input_ptr, aux_quantized_input_ptr, + fw_quantized_hidden_state_ptr, scaling_factors_ptr, + fw_hidden_state_ptr_batch, output_ptr_batch); + } + // Backward cell. + float* bw_hidden_state_ptr_batch = bw_hidden_state->data.f; + for (int s = max_time - 1; s >= 0; s--) { + const float* input_ptr_batch = + input->data.f + s * input_size * batch_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + s * input_size * batch_size + : nullptr; + float* output_ptr_batch = + (params->merge_outputs ? fw_output->data.f + fw_num_units + : bw_output->data.f) + + s * bw_output_step * batch_size; + + kernel_utils::RnnBatchStep( + input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale, + aux_input_ptr_batch, aux_bw_input_weights_ptr, + aux_bw_input_weights_scale, bw_recurrent_weights_ptr, + bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size, + bw_num_units, batch_size, bw_output_step, params->activation, + quantized_input_ptr, aux_quantized_input_ptr, + bw_quantized_hidden_state_ptr, scaling_factors_ptr, + bw_hidden_state_ptr_batch, output_ptr_batch); + } } - // Backward cell. - float* bw_hidden_state_ptr_batch = - bw_hidden_state->data.f + b * bw_num_units; - float* bw_output_offset = - params->merge_outputs - ? fw_output->data.f + b * bw_output_step * max_time - : bw_output->data.f + b * bw_output_step * max_time; - for (int s = max_time - 1; s >= 0; s--) { - const float* input_ptr_batch = - input->data.f + b * input_size * max_time + s * input_size; - const float* aux_input_ptr_batch = - (aux_input != nullptr) - ? aux_input->data.f + b * input_size * max_time + s * input_size - : nullptr; - float* output_ptr_batch = bw_output_offset + s * bw_output_step; - - kernel_utils::RnnBatchStep( - input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale, - aux_input_ptr_batch, aux_bw_input_weights_ptr, - aux_bw_input_weights_scale, bw_recurrent_weights_ptr, - bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size, - bw_num_units, /*batch_size=*/1, params->activation, - quantized_input_ptr, aux_quantized_input_ptr, - bw_quantized_hidden_state_ptr, scaling_factors_ptr, - bw_hidden_state_ptr_batch, output_ptr_batch); + } else { + for (int b = 0; b < batch_size; b++) { + // Forward cell. + float* fw_hidden_state_ptr_batch = + fw_hidden_state->data.f + b * fw_num_units; + float* fw_output_offset = + fw_output->data.f + b * fw_output_step * max_time; + for (int s = 0; s < max_time; s++) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = fw_output_offset + s * fw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, fw_input_weights_ptr, fw_input_weights_scale, + aux_input_ptr_batch, aux_fw_input_weights_ptr, + aux_fw_input_weights_scale, fw_recurrent_weights_ptr, + fw_recurrent_weights_scale, fw_bias_ptr, input_size, aux_input_size, + fw_num_units, /*batch_size=*/1, fw_output_step, params->activation, + quantized_input_ptr, aux_quantized_input_ptr, + fw_quantized_hidden_state_ptr, scaling_factors_ptr, + fw_hidden_state_ptr_batch, output_ptr_batch); + } + // Backward cell. + float* bw_hidden_state_ptr_batch = + bw_hidden_state->data.f + b * bw_num_units; + float* bw_output_offset = + params->merge_outputs + ? fw_output->data.f + b * bw_output_step * max_time + fw_num_units + : bw_output->data.f + b * bw_output_step * max_time; + for (int s = max_time - 1; s >= 0; s--) { + const float* input_ptr_batch = + input->data.f + b * input_size * max_time + s * input_size; + const float* aux_input_ptr_batch = + (aux_input != nullptr) + ? aux_input->data.f + b * input_size * max_time + s * input_size + : nullptr; + float* output_ptr_batch = bw_output_offset + s * bw_output_step; + + kernel_utils::RnnBatchStep( + input_ptr_batch, bw_input_weights_ptr, bw_input_weights_scale, + aux_input_ptr_batch, aux_bw_input_weights_ptr, + aux_bw_input_weights_scale, bw_recurrent_weights_ptr, + bw_recurrent_weights_scale, bw_bias_ptr, input_size, aux_input_size, + bw_num_units, /*batch_size=*/1, bw_output_step, params->activation, + quantized_input_ptr, aux_quantized_input_ptr, + bw_quantized_hidden_state_ptr, scaling_factors_ptr, + bw_hidden_state_ptr_batch, output_ptr_batch); + } } } return kTfLiteOk; diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc index f555c472f51b7c4d301c855c4edf83e219fea445..d0d04428c9594dab8d86342223bac15c0eefe298 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_rnn_test.cc @@ -14,8 +14,8 @@ limitations under the License. ==============================================================================*/ // Unit test for TFLite Bidirectional RNN op. -#include #include +#include #include #include @@ -654,7 +654,8 @@ const std::initializer_list recurrent_weights = { class BidirectionalRNNOpModel : public SingleOpModel { public: BidirectionalRNNOpModel(int batches, int sequence_len, int fw_units, - int bw_units, int input_size, bool merge_outputs) + int bw_units, int input_size, bool time_major, + bool merge_outputs) : batches_(batches), sequence_len_(sequence_len), fw_units_(fw_units), @@ -679,25 +680,29 @@ class BidirectionalRNNOpModel : public SingleOpModel { bw_output_ = AddOutput(TensorType_FLOAT32); } - SetBuiltinOp(BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, - BuiltinOptions_BidirectionalSequenceRNNOptions, - CreateBidirectionalSequenceRNNOptions( - builder_, /*time_major=*/false, - ActivationFunctionType_RELU, merge_outputs) - .Union()); + SetBuiltinOp( + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN, + BuiltinOptions_BidirectionalSequenceRNNOptions, + CreateBidirectionalSequenceRNNOptions( + builder_, time_major, ActivationFunctionType_RELU, merge_outputs) + .Union()); + const auto input_shape = + (time_major) ? std::vector({sequence_len_, batches_, input_size_}) + : std::vector({batches_, sequence_len_, input_size_}); + BuildInterpreter({ - {batches_, sequence_len_, input_size_}, // input - {fw_units_, input_size_}, // fw_weights - {fw_units_, fw_units_}, // fw_recurrent_weights - {fw_units_}, // fw_bias - {batches_, fw_units_}, // fw_hidden_state - {bw_units_, input_size_}, // bw_weights - {bw_units_, bw_units_}, // bw_recurrent_weights - {bw_units_}, // bw_bias - {batches_, bw_units_}, // bw_hidden_state - {batches_, sequence_len_, 0}, // aux_input - {fw_units_, 0}, // aux_fw_weights - {bw_units_, 0}, // aux_bw_weights + input_shape, // input + {fw_units_, input_size_}, // fw_weights + {fw_units_, fw_units_}, // fw_recurrent_weights + {fw_units_}, // fw_bias + {batches_, fw_units_}, // fw_hidden_state + {bw_units_, input_size_}, // bw_weights + {bw_units_, bw_units_}, // bw_recurrent_weights + {bw_units_}, // bw_bias + {batches_, bw_units_}, // bw_hidden_state + {batches_, sequence_len_, 0}, // aux_input + {fw_units_, 0}, // aux_fw_weights + {bw_units_, 0}, // aux_bw_weights }); } @@ -770,7 +775,8 @@ class BidirectionalRNNOpModel : public SingleOpModel { TEST(BidirectionalRNNOpTest, BlackBoxTest) { BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/false); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/false); rnn.SetFwWeights(weights); rnn.SetBwWeights(weights); rnn.SetFwBias(biases); @@ -803,11 +809,49 @@ TEST(BidirectionalRNNOpTest, BlackBoxTest) { EXPECT_THAT(rnn.GetBwOutput(), ElementsAreArray(ArrayFloatNear(bw_expected))); } -// Same as the previous test, yet with merged outputs. +// Same as BlackBox test, but input is reshuffled to time_major format. +TEST(BidirectionalRNNOpTest, BlackBoxTestTimeMajor) { + BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, + /*fw_units=*/16, /*bw_units=*/16, + /*input_size=*/8, /*time_major=*/true, + /*merge_outputs=*/false); + rnn.SetFwWeights(weights); + rnn.SetBwWeights(weights); + rnn.SetFwBias(biases); + rnn.SetBwBias(biases); + rnn.SetFwRecurrentWeights(recurrent_weights); + rnn.SetBwRecurrentWeights(recurrent_weights); + + // const int input_sequence_size = rnn.input_size() * rnn.sequence_len(); + // Insert the inputs in time_major format. The batch_major format is: + // [b0t0, b0t1, ..., b0t15, b1t0, b1t1, ..., b1t15]. This is reshuffled as: + // [b0t0, b1t0, b0t1, b1t1, ..., b0t15, b1t15]. + for (int i = 0; i < rnn.sequence_len(); i++) { + float* batch_start = rnn_input + i * rnn.input_size(); + float* batch_end = batch_start + rnn.input_size(); + // The two batches are identical. + rnn.SetInput(2 * i * rnn.input_size(), batch_start, batch_end); + rnn.SetInput((2 * i + 1) * rnn.input_size(), batch_start, batch_end); + } + + rnn.Invoke(); + + std::vector fw_expected; + for (int i = 0; i < rnn.sequence_len(); i++) { + float* golden_fw_start = rnn_golden_fw_output + i * rnn.num_fw_units(); + float* golden_fw_end = golden_fw_start + rnn.num_fw_units(); + fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end); + fw_expected.insert(fw_expected.end(), golden_fw_start, golden_fw_end); + } + EXPECT_THAT(rnn.GetFwOutput(), ElementsAreArray(ArrayFloatNear(fw_expected))); +} + +// Same as BlackBox test, yet with merged outputs. TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) { BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/true); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/true); rnn.SetFwWeights(weights); rnn.SetBwWeights(weights); rnn.SetFwBias(biases); @@ -840,12 +884,56 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestMergeOutputs) { ElementsAreArray(ArrayFloatNear(merged_expected))); } +// Same as BlackBox test, but input is reshuffled to time_major format. +TEST(BidirectionalRNNOpTest, BlackBoxTestTimeMajorMergeOutputs) { + BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, + /*fw_units=*/16, /*bw_units=*/16, + /*input_size=*/8, /*time_major=*/true, + /*merge_outputs=*/true); + rnn.SetFwWeights(weights); + rnn.SetBwWeights(weights); + rnn.SetFwBias(biases); + rnn.SetBwBias(biases); + rnn.SetFwRecurrentWeights(recurrent_weights); + rnn.SetBwRecurrentWeights(recurrent_weights); + + // Insert the inputs in time_major format. The batch_major format is: + // [b0t0, b0t1, ..., b0t15, b1t0, b1t1, ..., b1t15]. This is reshuffled as: + // [b0t0, b1t0, b0t1, b1t1, ..., b0t15, b1t15]. + for (int i = 0; i < rnn.sequence_len(); i++) { + float* batch_start = rnn_input + i * rnn.input_size(); + float* batch_end = batch_start + rnn.input_size(); + // The two batches are identical. + rnn.SetInput(2 * i * rnn.input_size(), batch_start, batch_end); + rnn.SetInput((2 * i + 1) * rnn.input_size(), batch_start, batch_end); + } + + rnn.Invoke(); + + std::vector merged_expected; + for (int step = 0; step < rnn.sequence_len(); step++) { + for (int bid = 0; bid < rnn.num_batches(); bid++) { + merged_expected.insert( + merged_expected.end(), + rnn_golden_fw_output + rnn.num_fw_units() * step, + rnn_golden_fw_output + rnn.num_fw_units() * (step + 1)); + merged_expected.insert( + merged_expected.end(), + rnn_golden_bw_output + rnn.num_bw_units() * step, + rnn_golden_bw_output + rnn.num_bw_units() * (step + 1)); + } + } + EXPECT_THAT(rnn.GetFwOutput(), + ElementsAreArray(ArrayFloatNear(merged_expected))); +} + // Check that if the input sequence is reversed the outputs are the same just // forward and backward are swapped (and reversed). TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) { BidirectionalRNNOpModel rnn(/*batches=*/2, /*sequence_len=*/16, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/false); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/false); rnn.SetFwWeights(weights); rnn.SetBwWeights(weights); rnn.SetFwBias(biases); @@ -891,7 +979,8 @@ TEST(BidirectionalRNNOpTest, BlackBoxTestReverseInputs) { TEST(BidirectionalRNNOpTest, EndToEndTest) { BidirectionalRNNOpModel rnn(/*batches=*/1, /*sequence_len=*/4, /*fw_units=*/16, /*bw_units=*/16, - /*input_size=*/8, /*merge_outputs=*/false); + /*input_size=*/8, /*time_major=*/false, + /*merge_outputs=*/false); const int output_size = 4; float dnn_weights[] = { -0.5782342, -0.052212059, 0.73036242, -0.81216097, -0.80088139, @@ -908,8 +997,8 @@ TEST(BidirectionalRNNOpTest, EndToEndTest) { 0.3492105, 0.56452453, 0.4389236, -0.59929526, -0.19762468, -0.36868393, -0.13198286, -0.53800809, -0.22850353}; - std::initializer_list dnn_biases = { - 0.29177809, -0.98799044, 0.065919638, 0.68781924}; + std::initializer_list dnn_biases = {0.29177809, -0.98799044, + 0.065919638, 0.68781924}; rnn.SetFwWeights(weights); rnn.SetBwWeights(weights); diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index dbcadbee14ceb040e087e221bb1cc4e0a40a47df..6695282a924b130366c2793b8e04c1bc4f8fe5a4 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -387,12 +387,14 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, data->scaling_factors_index); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); // Only one scale factor per batch is typically necessary. See optimized // implementation for why we need to allocate for the height of the inputs // flattened to 2D. - scaling_factors_size->data[0] = NumElements(input) / channels_in; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + const int height = NumElements(input) / channels_in; + int scaling_dims[1] = {height}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = height; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup.cc b/tensorflow/contrib/lite/kernels/embedding_lookup.cc index fe33f98eb0d78d988af4ded79065621b53b659f3..1d0c71ad48e36cf5df71f7f8909adaf97e9db989 100644 --- a/tensorflow/contrib/lite/kernels/embedding_lookup.cc +++ b/tensorflow/contrib/lite/kernels/embedding_lookup.cc @@ -78,7 +78,10 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node, for (int i = 0; i < SizeOfDimension(lookup, 0); i++) { int idx = lookup->data.i32[i]; if (idx >= row_size || idx < 0) { - context->ReportError(context, "Embedding Lookup: index out of bounds."); + context->ReportError(context, + "Embedding Lookup: index out of bounds. " + "Got %d, and bounds are [0, %d]", + idx, row_size - 1); return kTfLiteError; } else { memcpy(output->data.raw + i * row_bytes, @@ -104,7 +107,10 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node, for (int i = 0; i < SizeOfDimension(lookup, 0); i++) { int idx = lookup->data.i32[i]; if (idx >= row_size || idx < 0) { - context->ReportError(context, "Embedding Lookup: index out of bounds."); + context->ReportError(context, + "Embedding Lookup: index out of bounds. " + "Got %d, and bounds are [0, %d]", + idx, row_size - 1); return kTfLiteError; } else { // Dequantize embedding values. diff --git a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc index aa75b03990208f9ee61bb2ef3bfc9cbefcfbaf2b..0b076941ea2164bc41797f123bf71425f8b1d583 100644 --- a/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc +++ b/tensorflow/contrib/lite/kernels/embedding_lookup_sparse.cc @@ -188,7 +188,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { int idx = ids->data.i32[i]; if (idx >= num_rows || idx < 0) { context->ReportError(context, - "Embedding Lookup Sparse: index out of bounds."); + "Embedding Lookup Sparse: index out of bounds. " + "Got %d, and bounds are [0, %d]", + idx, num_rows - 1); return kTfLiteError; } diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index afb5ec05df4429bc89acfd81b71b9c081f90dfc9..5c9ca6e910ac9c0f0814b99b7d12206405bf1f28 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -49,6 +49,20 @@ cc_library( ], ) +cc_library( + name = "legacy_types", + srcs = [], + hdrs = [ + "compatibility.h", + "legacy_types.h", + "types.h", + ], + deps = [ + "//tensorflow/contrib/lite/kernels:op_macros", + "@com_google_absl//absl/base:core_headers", + ], +) + config_setting( name = "arm", values = { @@ -198,6 +212,7 @@ cc_library( ":strided_slice_logic", ":tensor_utils", ":types", + ":legacy_types", ":legacy_reference_base", ":round", "//third_party/eigen3", @@ -336,6 +351,7 @@ cc_library( ":quantization_util", ":round", ":strided_slice_logic", + ":legacy_types", ":types", "@gemmlowp", "//tensorflow/contrib/lite/c:c_api_internal", diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 083e5839bd5d3c07c0b92d12392f03a9abed0f03..7875b23979e33c2e7b5268c8d525dd4f46442b28 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -22,14 +22,15 @@ namespace kernel_utils { void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, const float* recurrent_weights_ptr, const float* bias_ptr, int input_size, int num_units, int batch_size, + int output_batch_leading_dim, TfLiteFusedActivation activation, float* hidden_state_ptr_batch, float* output_ptr_batch) { RnnBatchStep(input_ptr_batch, input_weights_ptr, /*aux_input_ptr_batch=*/nullptr, /*aux_input_weights_ptr=*/nullptr, recurrent_weights_ptr, bias_ptr, input_size, /*aux_input_size=*/0, num_units, - batch_size, activation, hidden_state_ptr_batch, - output_ptr_batch); + batch_size, output_batch_leading_dim, activation, + hidden_state_ptr_batch, output_ptr_batch); } void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, @@ -37,49 +38,100 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, const float* aux_input_weights_ptr, const float* recurrent_weights_ptr, const float* bias_ptr, int input_size, int aux_input_size, int num_units, - int batch_size, TfLiteFusedActivation activation, + int batch_size, int output_batch_leading_dim, + TfLiteFusedActivation activation, float* hidden_state_ptr_batch, float* output_ptr_batch) { - // Output = bias - tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size, - output_ptr_batch); - // Output += input * input_weights - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_weights_ptr, num_units, input_size, input_ptr_batch, batch_size, - output_ptr_batch, /*result_stride=*/1); - // Output += aux_input * aux_input_weights (if they are not empty). - if (aux_input_size > 0) { + // Since the output batch rows may not be contiguous (output_batch_leading_dim + // != n_output), we unroll the batched operations where this is the case. + if (output_batch_leading_dim == num_units) { + // Output = bias + tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size, + output_ptr_batch); + + // Output += input * input_weights tensor_utils::MatrixBatchVectorMultiplyAccumulate( - aux_input_weights_ptr, num_units, aux_input_size, aux_input_ptr_batch, + input_weights_ptr, num_units, input_size, input_ptr_batch, batch_size, + output_ptr_batch, /*result_stride=*/1); + + // Output += aux_input * aux_input_weights (if they are not empty). + if (aux_input_size > 0) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_weights_ptr, num_units, aux_input_size, aux_input_ptr_batch, + batch_size, output_ptr_batch, /*result_stride=*/1); + } + + // Output += recurrent_weights * hidden_state + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_weights_ptr, num_units, num_units, hidden_state_ptr_batch, batch_size, output_ptr_batch, /*result_stride=*/1); + + // Output = activation(Output) and update hidden_state + tensor_utils::ApplyActivationToVector( + output_ptr_batch, num_units * batch_size, activation, output_ptr_batch); + tensor_utils::CopyVector(output_ptr_batch, num_units * batch_size, + hidden_state_ptr_batch); + } else { + // Output = bias + for (int k = 0; k < batch_size; k++) { + tensor_utils::CopyVector(bias_ptr, num_units, + output_ptr_batch + k * output_batch_leading_dim); + } + + // Output += input * input_weights + for (int k = 0; k < batch_size; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_weights_ptr, num_units, input_size, + input_ptr_batch + k * input_size, /*n_batch=*/1, + output_ptr_batch + k * output_batch_leading_dim, /*result_stride=*/1); + } + + // Output += aux_input * aux_input_weights (if they are not empty). + if (aux_input_size > 0) { + for (int k = 0; k < batch_size; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_weights_ptr, num_units, aux_input_size, + aux_input_ptr_batch + k * aux_input_size, + /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim, + /*result_stride=*/1); + } + } + + // Output += recurrent_weights * hidden_state + for (int k = 0; k < batch_size; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_weights_ptr, num_units, num_units, + hidden_state_ptr_batch + k * num_units, + /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim, + /*result_stride=*/1); + } + + // Output = activation(Output) and update hidden_state + for (int k = 0; k < batch_size; k++) { + tensor_utils::ApplyActivationToVector( + output_ptr_batch + k * output_batch_leading_dim, num_units, + activation, output_ptr_batch + k * output_batch_leading_dim); + tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim, + num_units, + hidden_state_ptr_batch + k * num_units); + } } - // Output += recurrent_weights * hidden_state - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_weights_ptr, num_units, num_units, hidden_state_ptr_batch, - batch_size, output_ptr_batch, /*result_stride=*/1); - // Output = activation(Output) and update hidden_state - tensor_utils::ApplyActivationToVector( - output_ptr_batch, num_units * batch_size, activation, output_ptr_batch); - tensor_utils::VectorBatchVectorAssign(output_ptr_batch, num_units, batch_size, - hidden_state_ptr_batch); } -void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr, - float input_weights_scale, - const int8_t* recurrent_weights_ptr, - float recurrent_weights_scale, const float* bias_ptr, - int input_size, int num_units, int batch_size, - TfLiteFusedActivation activation, - int8_t* quantized_input_ptr_batch, - int8_t* quantized_hidden_state_ptr_batch, - float* scaling_factors, float* hidden_state_ptr_batch, - float* output_ptr_batch) { +void RnnBatchStep( + const float* input_ptr_batch, const int8_t* input_weights_ptr, + float input_weights_scale, const int8_t* recurrent_weights_ptr, + float recurrent_weights_scale, const float* bias_ptr, int input_size, + int num_units, int batch_size, int output_batch_leading_dim, + TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch, + int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors, + float* hidden_state_ptr_batch, float* output_ptr_batch) { RnnBatchStep(input_ptr_batch, input_weights_ptr, input_weights_scale, /*aux_input_ptr_batch=*/nullptr, /*aux_input_weights_ptr=*/nullptr, /*aux_input_weights_scale=*/0.0f, recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, input_size, - /*aux_input_size=*/0, num_units, batch_size, activation, - quantized_input_ptr_batch, + /*aux_input_size=*/0, num_units, batch_size, + output_batch_leading_dim, activation, quantized_input_ptr_batch, /*aux_quantized_input_ptr_batch=*/nullptr, quantized_hidden_state_ptr_batch, scaling_factors, hidden_state_ptr_batch, output_ptr_batch); @@ -91,82 +143,178 @@ void RnnBatchStep( const int8_t* aux_input_weights_ptr, float aux_input_weights_scale, const int8_t* recurrent_weights_ptr, float recurrent_weights_scale, const float* bias_ptr, int input_size, int aux_input_size, int num_units, - int batch_size, TfLiteFusedActivation activation, - int8_t* quantized_input_ptr_batch, int8_t* aux_quantized_input_ptr_batch, + int batch_size, int output_batch_leading_dim, + TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch, + int8_t* aux_quantized_input_ptr_batch, int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors, float* hidden_state_ptr_batch, float* output_ptr_batch) { - // Output = bias - tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size, - output_ptr_batch); - - // Save quantization and matmul computation for all zero input. - if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) { - // Quantize input from float to uint8 + quantization params (scaling - // factor). - float unused_min, unused_max; - // TODO(mirkov,raziel): replace this for-loop with a MACRO (or function) - // whichever is faster. - for (int b = 0; b < batch_size; ++b) { - const int offset = b * input_size; - tensor_utils::SymmetricQuantizeFloats( - input_ptr_batch + offset, input_size, - quantized_input_ptr_batch + offset, &unused_min, &unused_max, - &scaling_factors[b]); - scaling_factors[b] *= input_weights_scale; + // Since the output batch rows may not be contiguous (output_batch_leading_dim + // != n_output), we unroll the batched operations where this is the case. + if (output_batch_leading_dim == num_units) { + // Output = bias + tensor_utils::VectorBatchVectorAssign(bias_ptr, num_units, batch_size, + output_ptr_batch); + + // Save quantization and matmul computation for all zero input. + if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) { + // Quantize input from float to uint8 + quantization params (scaling + // factor). + float unused_min, unused_max; + // TODO(mirkov,raziel): replace this for-loop with a MACRO (or function) + // whichever is faster. + for (int b = 0; b < batch_size; ++b) { + const int offset = b * input_size; + tensor_utils::SymmetricQuantizeFloats( + input_ptr_batch + offset, input_size, + quantized_input_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + scaling_factors[b] *= input_weights_scale; + } + + // Output += input * input_weights + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_weights_ptr, num_units, input_size, quantized_input_ptr_batch, + scaling_factors, batch_size, output_ptr_batch, /*result_stride=*/1); } - // Output += input * input_weights - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_weights_ptr, num_units, input_size, quantized_input_ptr_batch, - scaling_factors, batch_size, output_ptr_batch, /*result_stride=*/1); - } + if (aux_input_ptr_batch && + !tensor_utils::IsZeroVector(aux_input_ptr_batch, + batch_size * aux_input_size)) { + float unused_min, unused_max; + for (int b = 0; b < batch_size; ++b) { + const int offset = b * aux_input_size; + tensor_utils::SymmetricQuantizeFloats( + aux_input_ptr_batch + offset, aux_input_size, + aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + scaling_factors[b] *= aux_input_weights_scale; + } - if (aux_input_ptr_batch && - !tensor_utils::IsZeroVector(aux_input_ptr_batch, - batch_size * aux_input_size)) { - float unused_min, unused_max; - for (int b = 0; b < batch_size; ++b) { - const int offset = b * aux_input_size; - tensor_utils::SymmetricQuantizeFloats( - aux_input_ptr_batch + offset, aux_input_size, - aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max, - &scaling_factors[b]); - scaling_factors[b] *= aux_input_weights_scale; + // Output += aux_input * aux_input_weights + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_weights_ptr, num_units, aux_input_size, + aux_quantized_input_ptr_batch, scaling_factors, batch_size, + output_ptr_batch, /*result_stride=*/1); } - // Output += aux_input * aux_input_weights - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - aux_input_weights_ptr, num_units, aux_input_size, - aux_quantized_input_ptr_batch, scaling_factors, batch_size, - output_ptr_batch, /*result_stride=*/1); - } + // Save quantization and matmul computation for all zero input. + if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch, + batch_size * num_units)) { + // Quantize hidden_state + float unused_min, unused_max; + for (int b = 0; b < batch_size; ++b) { + const int offset = b * num_units; + tensor_utils::SymmetricQuantizeFloats( + hidden_state_ptr_batch + offset, num_units, + quantized_hidden_state_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + scaling_factors[b] *= recurrent_weights_scale; + } - // Save quantization and matmul computation for all zero input. - if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch, - batch_size * num_units)) { - // Quantize hidden_state - float unused_min, unused_max; - for (int b = 0; b < batch_size; ++b) { - const int offset = b * num_units; - tensor_utils::SymmetricQuantizeFloats( - hidden_state_ptr_batch + offset, num_units, - quantized_hidden_state_ptr_batch + offset, &unused_min, &unused_max, - &scaling_factors[b]); - scaling_factors[b] *= recurrent_weights_scale; + // Output += recurrent_weights * hidden_state + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_weights_ptr, num_units, num_units, + quantized_hidden_state_ptr_batch, scaling_factors, batch_size, + output_ptr_batch, /*result_stride=*/1); } - // Output += recurrent_weights * hidden_state - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_weights_ptr, num_units, num_units, - quantized_hidden_state_ptr_batch, scaling_factors, batch_size, - output_ptr_batch, /*result_stride=*/1); - } + // Output = activation(Output) and update hidden_state + tensor_utils::ApplyActivationToVector( + output_ptr_batch, num_units * batch_size, activation, output_ptr_batch); + tensor_utils::CopyVector(output_ptr_batch, num_units * batch_size, + hidden_state_ptr_batch); + } else { + // Output = bias + for (int k = 0; k < batch_size; k++) { + tensor_utils::CopyVector(bias_ptr, num_units, + output_ptr_batch + k * output_batch_leading_dim); + } - // Output = activation(Output) and update hidden_state - tensor_utils::ApplyActivationToVector( - output_ptr_batch, num_units * batch_size, activation, output_ptr_batch); - tensor_utils::VectorBatchVectorAssign(output_ptr_batch, num_units, batch_size, - hidden_state_ptr_batch); + // Save quantization and matmul computation for all zero input. + if (!tensor_utils::IsZeroVector(input_ptr_batch, batch_size * input_size)) { + // Quantize input from float to uint8 + quantization params (scaling + // factor). + float unused_min, unused_max; + // TODO(mirkov,raziel): replace this for-loop with a MACRO (or function) + // whichever is faster. + for (int b = 0; b < batch_size; ++b) { + const int offset = b * input_size; + tensor_utils::SymmetricQuantizeFloats( + input_ptr_batch + offset, input_size, + quantized_input_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + scaling_factors[b] *= input_weights_scale; + } + + // Output += input * input_weights + for (int k = 0; k < batch_size; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_weights_ptr, num_units, input_size, + quantized_input_ptr_batch + k * input_size, &scaling_factors[k], + /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim, + /*result_stride=*/1); + } + } + + if (aux_input_ptr_batch && + !tensor_utils::IsZeroVector(aux_input_ptr_batch, + batch_size * aux_input_size)) { + float unused_min, unused_max; + for (int b = 0; b < batch_size; ++b) { + const int offset = b * aux_input_size; + tensor_utils::SymmetricQuantizeFloats( + aux_input_ptr_batch + offset, aux_input_size, + aux_quantized_input_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + scaling_factors[b] *= aux_input_weights_scale; + } + + // Output += aux_input * aux_input_weights + for (int k = 0; k < batch_size; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + aux_input_weights_ptr, num_units, aux_input_size, + aux_quantized_input_ptr_batch + k * aux_input_size, + &scaling_factors[k], + /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim, + /*result_stride=*/1); + } + } + + // Save quantization and matmul computation for all zero input. + if (!tensor_utils::IsZeroVector(hidden_state_ptr_batch, + batch_size * num_units)) { + // Quantize hidden_state + float unused_min, unused_max; + for (int b = 0; b < batch_size; ++b) { + const int offset = b * num_units; + tensor_utils::SymmetricQuantizeFloats( + hidden_state_ptr_batch + offset, num_units, + quantized_hidden_state_ptr_batch + offset, &unused_min, &unused_max, + &scaling_factors[b]); + scaling_factors[b] *= recurrent_weights_scale; + } + + // Output += recurrent_weights * hidden_state + for (int k = 0; k < batch_size; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_weights_ptr, num_units, num_units, + quantized_hidden_state_ptr_batch + k * num_units, + &scaling_factors[k], + /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim, + /*result_stride=*/1); + } + } + + // Output = activation(Output) and update hidden_state + for (int k = 0; k < batch_size; k++) { + tensor_utils::ApplyActivationToVector( + output_ptr_batch + k * output_batch_leading_dim, num_units, + activation, output_ptr_batch + k * output_batch_leading_dim); + tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim, + num_units, + hidden_state_ptr_batch + k * num_units); + } + } } } // namespace kernel_utils diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h index 74e0a4a53dec2cdd0eed73f31d9ab7c4605d3969..0387d753e5abf1a2f4e067f4d046997b39606c87 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h @@ -29,9 +29,17 @@ namespace kernel_utils { // The pointers with the suffix "_batch" point to data aligned in batch_major // order, and each step processes batch_size many inputs from input_ptr_batch, // and updates batch_size many outputs and hidden states. +// +// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the +// output tensor, and in most cases will be equal to num_units. It is usually +// not when we want to store the RNN output into a slice of the output tensor, +// e.g. for bidirectional RNNs with merge_outputs. In this case, the batched +// operations cannot be used since they assume that the batched outputs are +// contiguous, and we manually loop over the batched outputs. void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, const float* recurrent_weights_ptr, const float* bias_ptr, int input_size, int num_units, int batch_size, + int output_batch_leading_dim, TfLiteFusedActivation activation, float* hidden_state_ptr_batch, float* output_ptr_batch); @@ -41,7 +49,8 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, const float* aux_input_weights_ptr, const float* recurrent_weights_ptr, const float* bias_ptr, int input_size, int aux_input_size, int num_units, - int batch_size, TfLiteFusedActivation activation, + int batch_size, int output_batch_leading_dim, + TfLiteFusedActivation activation, float* hidden_state_ptr_batch, float* output_ptr_batch); // Performs a quantized RNN batch inference step. Same as above, but for @@ -54,16 +63,14 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, // batch_size) is used to store the scaling factors of the quantization (used // for recovery). // {input,recurrent}_weights_scale params are used for dequantization/recovery. -void RnnBatchStep(const float* input_ptr_batch, const int8_t* input_weights_ptr, - float input_weights_scale, - const int8_t* recurrent_weights_ptr, - float recurrent_weights_scale, const float* bias_ptr, - int input_size, int num_units, int batch_size, - TfLiteFusedActivation activation, - int8_t* quantized_input_ptr_batch, - int8_t* quantized_hidden_state_ptr_batch, - float* scaling_factors, float* hidden_state_ptr_batch, - float* output_ptr_batch); +void RnnBatchStep( + const float* input_ptr_batch, const int8_t* input_weights_ptr, + float input_weights_scale, const int8_t* recurrent_weights_ptr, + float recurrent_weights_scale, const float* bias_ptr, int input_size, + int num_units, int batch_size, int output_batch_leading_dim, + TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch, + int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors, + float* hidden_state_ptr_batch, float* output_ptr_batch); void RnnBatchStep( const float* input_ptr_batch, const int8_t* input_weights_ptr, @@ -71,8 +78,9 @@ void RnnBatchStep( const int8_t* aux_input_weights_ptr, float aux_input_weights_scale, const int8_t* recurrent_weights_ptr, float recurrent_weights_scale, const float* bias_ptr, int input_size, int aux_input_size, int num_units, - int batch_size, TfLiteFusedActivation activation, - int8_t* quantized_input_ptr_batch, int8_t* aux_quantized_input_ptr_batch, + int batch_size, int output_batch_leading_dim, + TfLiteFusedActivation activation, int8_t* quantized_input_ptr_batch, + int8_t* aux_quantized_input_ptr_batch, int8_t* quantized_hidden_state_ptr_batch, float* scaling_factors, float* hidden_state_ptr_batch, float* output_ptr_batch); diff --git a/tensorflow/compiler/xla/service/gpu/gpu_options.h b/tensorflow/contrib/lite/kernels/internal/legacy_types.h similarity index 54% rename from tensorflow/compiler/xla/service/gpu/gpu_options.h rename to tensorflow/contrib/lite/kernels/internal/legacy_types.h index 498d4a94955cb2c50e0b165f28ded44ac1c0bfff..2e4d3137f5c6acfdd7a0942510433323e5fee5ed 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_options.h +++ b/tensorflow/contrib/lite/kernels/internal/legacy_types.h @@ -12,22 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_ -#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ -#define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ +#include "tensorflow/contrib/lite/kernels/internal/types.h" -#include "tensorflow/compiler/xla/service/hlo_module_config.h" +namespace tflite { -// Helper functions for querying options that are specific to the GPU backend. +// TODO(b/116772710): Insert legacy Dims<> code in here. -namespace xla { -namespace gpu { +} // namespace tflite -// Returns true if we should use heuristics to assign convolution layouts, as -// opposed to always assigning NCHW. -bool ConvUseLayoutHeuristic(const HloModuleConfig& config); - -} // namespace gpu -} // namespace xla - -#endif // TENSORFLOW_COMPILER_XLA_SERVICE_GPU_GPU_OPTIONS_H_ +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_LEGACY_TYPES_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h index d8dd7bba897ab84ef81ef6425108577be5f02735..bcadfb2f8cdb1a3301b1f62c6b6d3a964dda0125 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -841,33 +841,6 @@ inline void FloatDepthwiseConvAccumRowGeneric( int filter_width, const float* filter_data, int out_x_buffer_start, int out_x_buffer_end, int output_depth, float* acc_buffer) { gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); -#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK - LOG(FATAL) - << "\n\n" - << "*****************************************************************\n" - << "* This tfmini inference code was about to use the slow generic\n" - << "* fallback implementation for a DepthwiseConv op, and we want you\n" - << "* to be aware of that so that you will know why you get terrible\n" - << "* performance.\n" - << "*\n" - << "* If you would like to carry on with the slow code, compile\n" - << "* with this preprocessor token defined:\n" - << "* ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n" - << "*\n" - << "* The right thing to do, if you care about performance, is to add\n" - << "* a new DepthwiseConv kernel to tfmini to cover your case.\n" - << "* The relevant parameters defining your case are:\n" - << "* stride = " << stride << "\n" - << "* input_depth = " << input_depth << "\n" - << "* depth_multiplier = " << depth_multiplier << "\n" - << "* dilation_factor = " << dilation_factor << "\n" - << "*\n" - << "* Please do not hesitate to contact benoitjacob@ with this\n" - << "* information.\n" - << "*****************************************************************\n"; -#endif // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#endif // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK const float* filter_base_ptr = filter_data; for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int out_x_loop_start = std::max( diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index 803eff292a3f383c1e1cceff7c07d434b0c48587..eff9cab4778a6865f93338489df28707fa3afae2 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -1546,33 +1546,6 @@ inline void QuantizedDepthwiseConvAccumRowGeneric( int16 filter_offset, int out_x_buffer_start, int out_x_buffer_end, int output_depth, int32* acc_buffer) { gemmlowp::ScopedProfilingLabel label("DepthwiseConvAccumRowGeneric (slow)"); -#ifdef TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#ifndef ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK - LOG(FATAL) - << "\n\n" - << "*****************************************************************\n" - << "* This tfmini inference code was about to use the slow generic\n" - << "* fallback implementation for a DepthwiseConv op, and we want you\n" - << "* to be aware of that so that you will know why you get terrible\n" - << "* performance.\n" - << "*\n" - << "* If you would like to carry on with the slow code, compile\n" - << "* with this preprocessor token defined:\n" - << "* ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK.\n" - << "*\n" - << "* The right thing to do, if you care about performance, is to add\n" - << "* a new DepthwiseConv kernel to tfmini to cover your case.\n" - << "* The relevant parameters defining your case are:\n" - << "* stride = " << stride << "\n" - << "* input_depth = " << input_depth << "\n" - << "* depth_multiplier = " << depth_multiplier << "\n" - << "* dilation_factor = " << dilation_factor << "\n" - << "*\n" - << "* Please do not hesitate to contact benoitjacob@ with this\n" - << "* information.\n" - << "*****************************************************************\n"; -#endif // ALLOW_SLOW_GENERIC_DEPTHWISECONV_FALLBACK -#endif // TFLITE_PREVENT_SLOW_GENERIC_DEPTHWISECONV_FALLBACK const uint8* filter_base_ptr = filter_data; for (int filter_x = 0; filter_x < filter_width; ++filter_x) { const int out_x_loop_start = std::max( diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 77f84e0c1c2b76b5d84324d945d4774876a51031..aa09a0a9d49f2b3e6ef5d8c1b4874389fc6ed45e 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -5674,12 +5674,12 @@ void TransposeIm2col(const ConvParams& params, uint8 zero_byte, const int batches = MatchingDim(input_shape, 0, output_shape, 0); const int input_height = input_shape.Dims(1); const int input_width = input_shape.Dims(2); - const int input_depth = MatchingDim(input_shape, 3, filter_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); const int filter_height = filter_shape.Dims(1); const int filter_width = filter_shape.Dims(2); const int output_height = output_shape.Dims(1); const int output_width = output_shape.Dims(2); - MatchingDim(output_shape, 3, filter_shape, 3); // output_depth + MatchingDim(output_shape, 3, filter_shape, 0); // output_depth // Construct the MxN sized im2col matrix. // The rows M, are sub-ordered B x H x W diff --git a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h index be99240b1f193148791c36c808ddce7a883fb72e..c8b64cfd96798c6cf5e57064f9a9af139fa37471 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h @@ -19,10 +19,10 @@ limitations under the License. #include #include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/legacy_types.h" #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" #include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" #include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" -#include "tensorflow/contrib/lite/kernels/internal/types.h" namespace tflite { @@ -30,6 +30,11 @@ namespace reference_ops { static constexpr int kDepthwiseReverseShift = -1; +inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) { + shape->BuildFrom( + {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); +} + inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, const float* filter_data, const Dims<4>& filter_dims, const float* bias_data, const Dims<4>& bias_dims, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 59f17ae85495173f328421203fa99bd789991f9a..19d23fa80bba9d0533f9730781b1a0499787be36 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -100,11 +100,6 @@ gemmlowp::FixedPoint SaturatingSub( namespace reference_ops { -inline void ShapeFromDims(const tflite::Dims<4>& dims, RuntimeShape* shape) { - shape->BuildFrom( - {dims.sizes[3], dims.sizes[2], dims.sizes[1], dims.sizes[0]}); -} - template int CountLeadingZeros(T integer_input) { static_assert(std::is_unsigned::value, diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index c6bc6074d49c1902e4a6d1984f970e13c4f876a7..a5913143b9ab834d13a3aef3056f68c9e4ad6b76 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -107,6 +107,11 @@ struct QuantizationParams { double scale = 0.0; }; +inline bool operator==(const QuantizationParams& qp1, + const QuantizationParams& qp2) { + return qp1.zero_point == qp2.zero_point && qp1.scale == qp2.scale; +} + template struct Dims { int sizes[N]; diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc index 9739fd4514bd582fb5c4d8ad6b9cb55a817db80c..48dd03e7ae7e2a8bd4d89e5210a434eccec08030 100644 --- a/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc +++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm.cc @@ -409,9 +409,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/4); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {n_batch}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } @@ -420,10 +421,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, /*index=*/5); prod_scaling_factors->type = kTfLiteFloat32; prod_scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); - prod_scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(prod_scaling_factors->dims, - prod_scaling_factors_size)) { + if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1, + scaling_dims)) { + TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); + prod_scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, prod_scaling_factors, prod_scaling_factors_size)); @@ -435,9 +436,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* recovered_weights = GetTemporary(context, node, /*index=*/6); recovered_weights->type = kTfLiteFloat32; recovered_weights->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* recovered_weights_size = TfLiteIntArrayCreate(1); - recovered_weights_size->data[0] = n_cell; - if (!TfLiteIntArrayEqual(recovered_weights->dims, recovered_weights_size)) { + int recovered_dims[1] = {n_cell}; + if (!TfLiteIntArrayEqualsArray(recovered_weights->dims, 1, + recovered_dims)) { + TfLiteIntArray* recovered_weights_size = TfLiteIntArrayCreate(1); + recovered_weights_size->data[0] = n_cell; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, recovered_weights, recovered_weights_size)); diff --git a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc index 479f6a7d3c0cdb969f73a83fa28cd1c79940f807..1535f750f94e725061265f76209fbaa213558346 100644 --- a/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/layer_norm_lstm_test.cc @@ -129,87 +129,85 @@ class LayerNormLSTMOpModel : public SingleOpModel { BuildInterpreter(input_shapes); } - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { PopulateTensor(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { PopulateTensor(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { PopulateTensor(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { PopulateTensor(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { PopulateTensor(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { PopulateTensor(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { PopulateTensor(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { PopulateTensor(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { PopulateTensor(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { PopulateTensor(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { PopulateTensor(cell_to_output_weights_, f); } - void SetInputLayerNormWeights(std::initializer_list f) { + void SetInputLayerNormWeights(std::vector f) { PopulateTensor(input_layer_norm_weights_, f); } - void SetForgetLayerNormWeights(std::initializer_list f) { + void SetForgetLayerNormWeights(std::vector f) { PopulateTensor(forget_layer_norm_weights_, f); } - void SetCellLayerNormWeights(std::initializer_list f) { + void SetCellLayerNormWeights(std::vector f) { PopulateTensor(cell_layer_norm_weights_, f); } - void SetOutputLayerNormWeights(std::initializer_list f) { + void SetOutputLayerNormWeights(std::vector f) { PopulateTensor(output_layer_norm_weights_, f); } - void SetInputGateBias(std::initializer_list f) { + void SetInputGateBias(std::vector f) { PopulateTensor(input_gate_bias_, f); } - void SetForgetGateBias(std::initializer_list f) { + void SetForgetGateBias(std::vector f) { PopulateTensor(forget_gate_bias_, f); } - void SetCellBias(std::initializer_list f) { - PopulateTensor(cell_bias_, f); - } + void SetCellBias(std::vector f) { PopulateTensor(cell_bias_, f); } - void SetOutputGateBias(std::initializer_list f) { + void SetOutputGateBias(std::vector f) { PopulateTensor(output_gate_bias_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { PopulateTensor(projection_weights_, f); } - void SetProjectionBias(std::initializer_list f) { + void SetProjectionBias(std::vector f) { PopulateTensor(projection_bias_, f); } @@ -278,67 +276,67 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel { use_projection_bias, cell_clip, proj_clip, input_shapes, TensorType_UINT8) {} - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_output_weights_, f); } - void SetInputLayerNormWeights(std::initializer_list f) { + void SetInputLayerNormWeights(std::vector f) { PopulateTensor(input_layer_norm_weights_, f); } - void SetForgetLayerNormWeights(std::initializer_list f) { + void SetForgetLayerNormWeights(std::vector f) { PopulateTensor(forget_layer_norm_weights_, f); } - void SetCellLayerNormWeights(std::initializer_list f) { + void SetCellLayerNormWeights(std::vector f) { PopulateTensor(cell_layer_norm_weights_, f); } - void SetOutputLayerNormWeights(std::initializer_list f) { + void SetOutputLayerNormWeights(std::vector f) { PopulateTensor(output_layer_norm_weights_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { SymmetricQuantizeAndPopulate(projection_weights_, f); } }; @@ -346,26 +344,26 @@ class HybridLayerNormLSTMOpModel : public LayerNormLSTMOpModel { class BaseLayerNormLstmTest : public ::testing::Test { protected: // Weights of the Layer Norm LSTM model. Some are optional. - std::initializer_list input_to_input_weights_; - std::initializer_list input_to_cell_weights_; - std::initializer_list input_to_forget_weights_; - std::initializer_list input_to_output_weights_; - std::initializer_list input_gate_bias_; - std::initializer_list cell_gate_bias_; - std::initializer_list forget_gate_bias_; - std::initializer_list output_gate_bias_; - std::initializer_list recurrent_to_input_weights_; - std::initializer_list recurrent_to_cell_weights_; - std::initializer_list recurrent_to_forget_weights_; - std::initializer_list recurrent_to_output_weights_; - std::initializer_list cell_to_input_weights_; - std::initializer_list cell_to_forget_weights_; - std::initializer_list cell_to_output_weights_; - std::initializer_list input_layer_norm_weights_; - std::initializer_list forget_layer_norm_weights_; - std::initializer_list cell_layer_norm_weights_; - std::initializer_list output_layer_norm_weights_; - std::initializer_list projection_weights_; + std::vector input_to_input_weights_; + std::vector input_to_cell_weights_; + std::vector input_to_forget_weights_; + std::vector input_to_output_weights_; + std::vector input_gate_bias_; + std::vector cell_gate_bias_; + std::vector forget_gate_bias_; + std::vector output_gate_bias_; + std::vector recurrent_to_input_weights_; + std::vector recurrent_to_cell_weights_; + std::vector recurrent_to_forget_weights_; + std::vector recurrent_to_output_weights_; + std::vector cell_to_input_weights_; + std::vector cell_to_forget_weights_; + std::vector cell_to_output_weights_; + std::vector input_layer_norm_weights_; + std::vector forget_layer_norm_weights_; + std::vector cell_layer_norm_weights_; + std::vector output_layer_norm_weights_; + std::vector projection_weights_; // Layer Norm LSTM input is stored as num_batch x num_inputs vector. std::vector> layer_norm_lstm_input_; diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 16d67a1a938a43f58db7408a4c5b396fef252742..f08a1a80c0579fe58ce086113e68da2fa42b0c3b 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -386,9 +386,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/4); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {n_batch}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } @@ -397,10 +398,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, /*index=*/5); prod_scaling_factors->type = kTfLiteFloat32; prod_scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); - prod_scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(prod_scaling_factors->dims, - prod_scaling_factors_size)) { + if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1, + scaling_dims)) { + TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); + prod_scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, prod_scaling_factors, prod_scaling_factors_size)); @@ -413,10 +414,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, /*index=*/6); recovered_cell_weights->type = kTfLiteFloat32; recovered_cell_weights->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1); - recovered_cell_weights_size->data[0] = n_cell; - if (!TfLiteIntArrayEqual(recovered_cell_weights->dims, - recovered_cell_weights_size)) { + int recovered_cell_dims[1] = {n_cell}; + if (!TfLiteIntArrayEqualsArray(recovered_cell_weights->dims, 1, + recovered_cell_dims)) { + TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1); + recovered_cell_weights_size->data[0] = n_cell; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, recovered_cell_weights, recovered_cell_weights_size)); diff --git a/tensorflow/contrib/lite/kernels/lstm_eval.cc b/tensorflow/contrib/lite/kernels/lstm_eval.cc index 20a4e3000985cfddfaa59e48a229f8473aa033aa..dc0a0b455b1685d956ed91e7ff131fceaf8786bf 100644 --- a/tensorflow/contrib/lite/kernels/lstm_eval.cc +++ b/tensorflow/contrib/lite/kernels/lstm_eval.cc @@ -36,12 +36,20 @@ namespace { // - n_cell: number of cells (or units), // - n_input: the input size, // - n_output: the output size. +// - output_batch_leading_dim: the leading dimension of the output buffer. // // The pointers to the cell and output state and the output are updated. // // The pointers with the suffix "_batch" point to data aligned in batch_major // order, and each step processes batch_size many inputs from input_ptr_batch, // and updates batch_size many cell and output states. +// +// The output_batch_dim is output.shape[-1], i.e. the outermost dimension of the +// output tensor, and in most cases will be equal to n_output. It is usually not +// when we want to store the LSTM output into a slice of the output tensor, e.g. +// for bidirectional LSTMs with merge_outputs. In this case, the batched +// operations cannot be used since they assume that the batched outputs are +// contiguous, and we manually loop over the batched outputs. inline void LstmStepWithAuxInput( const float* input_ptr_batch, const float* input_to_input_weights_ptr, const float* input_to_forget_weights_ptr, @@ -62,7 +70,8 @@ inline void LstmStepWithAuxInput( const float* output_gate_bias_ptr, const float* projection_weights_ptr, const float* projection_bias_ptr, const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, int n_aux_input, int n_output, - float* output_state_ptr, float* cell_state_ptr, float* input_gate_scratch, + int output_batch_leading_dim, float* output_state_ptr, + float* cell_state_ptr, float* input_gate_scratch, float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, float* output_ptr_batch) { // Since we have already checked that weights are all there or none, we can @@ -188,29 +197,72 @@ inline void LstmStepWithAuxInput( tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, n_batch * n_cell, output_gate_scratch); - // For each batch: update the projection and output_state. const bool use_projection_weight = (projection_weights_ptr != nullptr); const bool use_projection_bias = (projection_bias_ptr != nullptr); - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, - n_batch, output_ptr_batch); + + // For each batch: update the projection and output_state. Note that since + // the output batch rows may not be contiguous (output_batch_leading_dim != + // n_output), we unroll the batched operations where this is the case. + if (output_batch_leading_dim == n_output) { + if (use_projection_weight) { + if (use_projection_bias) { + tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, + n_batch, output_ptr_batch); + } else { + tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, output_gate_scratch, + n_batch, output_ptr_batch, /*result_stride=*/1); + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, + params->proj_clip, output_ptr_batch); + } } else { - tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch, - output_ptr_batch, /*result_stride=*/1); - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, - params->proj_clip, output_ptr_batch); + tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, + output_ptr_batch); } + tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, + output_state_ptr); } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output_ptr_batch); + if (use_projection_weight) { + if (use_projection_bias) { + for (int k = 0; k < n_batch; k++) { + tensor_utils::CopyVector( + projection_bias_ptr, n_output, + output_ptr_batch + k * output_batch_leading_dim); + } + } else { + for (int k = 0; k < n_batch; k++) { + tensor_utils::ZeroVector( + output_ptr_batch + k * output_batch_leading_dim, n_output); + } + } + for (int k = 0; k < n_batch; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, + output_gate_scratch + k * n_cell, + /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim, + /*result_stride=*/1); + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector( + output_ptr_batch + k * output_batch_leading_dim, n_output, + params->proj_clip, + output_ptr_batch + k * output_batch_leading_dim); + } + } + } else { + for (int k = 0; k < n_batch; k++) { + tensor_utils::CopyVector( + output_gate_scratch + k * n_output, n_output, + output_ptr_batch + k * output_batch_leading_dim); + } + } + for (int k = 0; k < n_batch; k++) { + tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim, + n_output, output_state_ptr + k * n_output); + } } - tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, - output_state_ptr); } // Same as above but with quantized weight matrices. In detail: @@ -263,7 +315,7 @@ inline void LstmStepWithAuxInput( // Outputs: // output_state_ptr - size 'n_batch * n_output' // cell_state_ptr - size 'n_batch * n_cell' -// output_ptr_batch - size 'n_batch * n_output' +// output_ptr_batch - size 'n_batch * output_batch_leading_dim' inline void LstmStepWithAuxInput( const float* input_ptr_batch, const int8_t* input_to_input_weights_ptr, float input_to_input_weights_scale, @@ -297,13 +349,13 @@ inline void LstmStepWithAuxInput( const float* output_gate_bias_ptr, const int8_t* projection_weights_ptr, float projection_weights_scale, const float* projection_bias_ptr, const TfLiteLSTMParams* params, int n_batch, int n_cell, int n_input, - int n_aux_input, int n_output, float* input_gate_scratch, - float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, - float* scaling_factors, float* product_scaling_factors, - float* recovered_cell_weights, int8_t* quantized_input_ptr_batch, - int8_t* quantized_aux_input_ptr_batch, int8_t* quantized_output_state_ptr, - int8_t* quantized_cell_state_ptr, float* output_state_ptr, - float* cell_state_ptr, float* output_ptr_batch) { + int n_aux_input, int n_output, int output_batch_leading_dim, + float* input_gate_scratch, float* forget_gate_scratch, float* cell_scratch, + float* output_gate_scratch, float* scaling_factors, + float* product_scaling_factors, float* recovered_cell_weights, + int8_t* quantized_input_ptr_batch, int8_t* quantized_aux_input_ptr_batch, + int8_t* quantized_output_state_ptr, int8_t* quantized_cell_state_ptr, + float* output_state_ptr, float* cell_state_ptr, float* output_ptr_batch) { // Since we have already checked that weights are all there or none, we // can check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights_ptr == nullptr); @@ -536,45 +588,106 @@ inline void LstmStepWithAuxInput( tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, n_batch * n_cell, output_gate_scratch); - // For each batch: update the projection and output_state. const bool use_projection_weight = (projection_weights_ptr != nullptr); const bool use_projection_bias = (projection_bias_ptr != nullptr); - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, - n_batch, output_ptr_batch); + + // For each batch: update the projection and output_state. Note that since + // the output batch rows may not be contiguous (output_batch_leading_dim != + // n_output), we unroll the batched operations where this is the case. + if (output_batch_leading_dim == n_output) { + if (use_projection_weight) { + if (use_projection_bias) { + tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, + n_batch, output_ptr_batch); + } else { + tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); + } + if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_cell; + tensor_utils::SymmetricQuantizeFloats( + output_gate_scratch + offset, n_cell, + quantized_cell_state_ptr + offset, &unused_min, &unused_max, + &scaling_factors[b]); + } + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * projection_weights_scale; + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr, + product_scaling_factors, n_batch, output_ptr_batch, + /*result_stride=*/1); + } + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, + params->proj_clip, output_ptr_batch); + } } else { - tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); + tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, + output_ptr_batch); } - if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) { - // Save quantization and matmul computation for all zero input. - float unused_min, unused_max; - for (int b = 0; b < n_batch; ++b) { - const int offset = b * n_cell; - tensor_utils::SymmetricQuantizeFloats( - output_gate_scratch + offset, n_cell, - quantized_cell_state_ptr + offset, &unused_min, &unused_max, - &scaling_factors[b]); + tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, + output_state_ptr); + } else { + if (use_projection_weight) { + if (use_projection_bias) { + for (int k = 0; k < n_batch; k++) { + tensor_utils::CopyVector( + projection_bias_ptr, n_output, + output_ptr_batch + k * output_batch_leading_dim); + } + } else { + for (int k = 0; k < n_batch; k++) { + tensor_utils::ZeroVector( + output_ptr_batch + k * output_batch_leading_dim, n_output); + } } - for (int b = 0; b < n_batch; ++b) { - product_scaling_factors[b] = - scaling_factors[b] * projection_weights_scale; + if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) { + // Save quantization and matmul computation for all zero input. + float unused_min, unused_max; + for (int b = 0; b < n_batch; ++b) { + const int offset = b * n_cell; + tensor_utils::SymmetricQuantizeFloats( + output_gate_scratch + offset, n_cell, + quantized_cell_state_ptr + offset, &unused_min, &unused_max, + &scaling_factors[b]); + } + for (int b = 0; b < n_batch; ++b) { + product_scaling_factors[b] = + scaling_factors[b] * projection_weights_scale; + } + for (int k = 0; k < n_batch; k++) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, + quantized_cell_state_ptr + k * n_cell, + &product_scaling_factors[k], + /*n_batch=*/1, output_ptr_batch + k * output_batch_leading_dim, + /*result_stride=*/1); + } + } + if (params->proj_clip > 0.0) { + for (int k = 0; k < n_batch; k++) { + tensor_utils::ClipVector( + output_ptr_batch + k * output_batch_leading_dim, n_output, + params->proj_clip, + output_ptr_batch + k * output_batch_leading_dim); + } + } + } else { + for (int k = 0; k < n_batch; k++) { + tensor_utils::CopyVector( + output_gate_scratch + k * n_output, n_output, + output_ptr_batch + k * output_batch_leading_dim); } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights_ptr, n_output, n_cell, quantized_cell_state_ptr, - product_scaling_factors, n_batch, output_ptr_batch, - /*result_stride=*/1); } - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, - params->proj_clip, output_ptr_batch); + for (int k = 0; k < n_batch; k++) { + tensor_utils::CopyVector(output_ptr_batch + k * output_batch_leading_dim, + n_output, output_state_ptr + k * n_output); } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output_ptr_batch); } - tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, - output_state_ptr); } } // namespace @@ -657,15 +770,19 @@ TfLiteStatus EvalFloat( float* aux_input_to_output_weights_ptr = nullptr; if (aux_input_size > 0) { aux_input_ptr = aux_input->data.f; - aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f; + if (!use_cifg) { + aux_input_to_input_weights_ptr = aux_input_to_input_weights->data.f; + } aux_input_to_forget_weights_ptr = aux_input_to_forget_weights->data.f; aux_input_to_cell_weights_ptr = aux_input_to_cell_weights->data.f; aux_input_to_output_weights_ptr = aux_input_to_output_weights->data.f; } // Loop through the sequence. + const int output_batch_leading_dim = + output->dims->data[output->dims->size - 1]; const int input_step = n_batch * n_input; - const int output_step = n_batch * output->dims->data[output->dims->size - 1]; + const int output_step = n_batch * output_batch_leading_dim; for (int t = 0; t < max_time; t++) { // If this is the forward_sequence, step forward, otherwise step backwards. const int t_rel = forward_sequence ? t : max_time - t - 1; @@ -685,9 +802,9 @@ TfLiteStatus EvalFloat( input_gate_bias_ptr, forget_gate_bias->data.f, cell_bias->data.f, output_gate_bias->data.f, projection_weights_ptr, projection_bias_ptr, params, n_batch, n_cell, n_input, aux_input_size, n_output, - activation_state->data.f, cell_state->data.f, input_gate_scratch, - forget_gate_scratch, cell_scratch, output_gate_scratch, - output_ptr_time); + output_batch_leading_dim, activation_state->data.f, cell_state->data.f, + input_gate_scratch, forget_gate_scratch, cell_scratch, + output_gate_scratch, output_ptr_time); } return kTfLiteOk; } @@ -851,15 +968,20 @@ TfLiteStatus EvalHybrid( float aux_input_to_output_weights_scale = 0.0f; if (aux_input_size > 0) { aux_input_ptr = aux_input->data.f; - aux_input_to_input_weights_ptr = - reinterpret_cast(aux_input_to_input_weights->data.uint8); + if (!use_cifg) { + aux_input_to_input_weights_ptr = + reinterpret_cast(aux_input_to_input_weights->data.uint8); + } aux_input_to_forget_weights_ptr = reinterpret_cast(aux_input_to_forget_weights->data.uint8); aux_input_to_cell_weights_ptr = reinterpret_cast(aux_input_to_cell_weights->data.uint8); aux_input_to_output_weights_ptr = reinterpret_cast(aux_input_to_output_weights->data.uint8); - aux_input_to_input_weights_scale = aux_input_to_input_weights->params.scale; + if (!use_cifg) { + aux_input_to_input_weights_scale = + aux_input_to_input_weights->params.scale; + } aux_input_to_forget_weights_scale = aux_input_to_forget_weights->params.scale; aux_input_to_cell_weights_scale = aux_input_to_cell_weights->params.scale; @@ -868,8 +990,10 @@ TfLiteStatus EvalHybrid( } // Feed the sequence into the LSTM step-by-step. + const int output_batch_leading_dim = + output->dims->data[output->dims->size - 1]; const int input_step = n_batch * n_input; - const int output_step = n_batch * output->dims->data[output->dims->size - 1]; + const int output_step = n_batch * output_batch_leading_dim; for (int t = 0; t < max_time; t++) { // If this is the forward_sequence, step forward, otherwise step backwards. const int t_rel = forward_sequence ? t : max_time - t - 1; @@ -895,9 +1019,9 @@ TfLiteStatus EvalHybrid( cell_to_output_weights_scale, input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr, projection_weights_scale, projection_bias_ptr, params, n_batch, n_cell, - n_input, aux_input_size, n_output, input_gate_scratch, - forget_gate_scratch, cell_scratch, output_gate_scratch, - scaling_factors_ptr, prod_scaling_factors_ptr, + n_input, aux_input_size, n_output, output_batch_leading_dim, + input_gate_scratch, forget_gate_scratch, cell_scratch, + output_gate_scratch, scaling_factors_ptr, prod_scaling_factors_ptr, recovered_cell_weights_ptr, quantized_input_ptr, quantized_aux_input_ptr, quantized_output_state_ptr, quantized_cell_state_ptr, output_state_ptr, cell_state_ptr, output_ptr); diff --git a/tensorflow/contrib/lite/kernels/lstm_test.cc b/tensorflow/contrib/lite/kernels/lstm_test.cc index e7ddfceb4527c4c32cece224e9b155db4ff0ea4f..f8947db724217421aba637ae884bedb5d412b2db 100644 --- a/tensorflow/contrib/lite/kernels/lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/lstm_test.cc @@ -116,71 +116,69 @@ class LSTMOpModel : public SingleOpModel { BuildInterpreter(input_shapes); } - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { PopulateTensor(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { PopulateTensor(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { PopulateTensor(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { PopulateTensor(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { PopulateTensor(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { PopulateTensor(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { PopulateTensor(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { PopulateTensor(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { PopulateTensor(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { PopulateTensor(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { PopulateTensor(cell_to_output_weights_, f); } - void SetInputGateBias(std::initializer_list f) { + void SetInputGateBias(std::vector f) { PopulateTensor(input_gate_bias_, f); } - void SetForgetGateBias(std::initializer_list f) { + void SetForgetGateBias(std::vector f) { PopulateTensor(forget_gate_bias_, f); } - void SetCellBias(std::initializer_list f) { - PopulateTensor(cell_bias_, f); - } + void SetCellBias(std::vector f) { PopulateTensor(cell_bias_, f); } - void SetOutputGateBias(std::initializer_list f) { + void SetOutputGateBias(std::vector f) { PopulateTensor(output_gate_bias_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { PopulateTensor(projection_weights_, f); } - void SetProjectionBias(std::initializer_list f) { + void SetProjectionBias(std::vector f) { PopulateTensor(projection_bias_, f); } @@ -243,51 +241,51 @@ class HybridLSTMOpModel : public LSTMOpModel { use_projection_weights, use_projection_bias, cell_clip, proj_clip, input_shapes, TensorType_UINT8) {} - void SetInputToInputWeights(std::initializer_list f) { + void SetInputToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_input_weights_, f); } - void SetInputToForgetWeights(std::initializer_list f) { + void SetInputToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_forget_weights_, f); } - void SetInputToCellWeights(std::initializer_list f) { + void SetInputToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_cell_weights_, f); } - void SetInputToOutputWeights(std::initializer_list f) { + void SetInputToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(input_to_output_weights_, f); } - void SetRecurrentToInputWeights(std::initializer_list f) { + void SetRecurrentToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_input_weights_, f); } - void SetRecurrentToForgetWeights(std::initializer_list f) { + void SetRecurrentToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_forget_weights_, f); } - void SetRecurrentToCellWeights(std::initializer_list f) { + void SetRecurrentToCellWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_cell_weights_, f); } - void SetRecurrentToOutputWeights(std::initializer_list f) { + void SetRecurrentToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(recurrent_to_output_weights_, f); } - void SetCellToInputWeights(std::initializer_list f) { + void SetCellToInputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_input_weights_, f); } - void SetCellToForgetWeights(std::initializer_list f) { + void SetCellToForgetWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_forget_weights_, f); } - void SetCellToOutputWeights(std::initializer_list f) { + void SetCellToOutputWeights(std::vector f) { SymmetricQuantizeAndPopulate(cell_to_output_weights_, f); } - void SetProjectionWeights(std::initializer_list f) { + void SetProjectionWeights(std::vector f) { SymmetricQuantizeAndPopulate(projection_weights_, f); } }; @@ -295,22 +293,22 @@ class HybridLSTMOpModel : public LSTMOpModel { class BaseLstmTest : public ::testing::Test { protected: // Weights of the LSTM model. Some are optional. - std::initializer_list input_to_input_weights_; - std::initializer_list input_to_cell_weights_; - std::initializer_list input_to_forget_weights_; - std::initializer_list input_to_output_weights_; - std::initializer_list input_gate_bias_; - std::initializer_list cell_gate_bias_; - std::initializer_list forget_gate_bias_; - std::initializer_list output_gate_bias_; - std::initializer_list recurrent_to_input_weights_; - std::initializer_list recurrent_to_cell_weights_; - std::initializer_list recurrent_to_forget_weights_; - std::initializer_list recurrent_to_output_weights_; - std::initializer_list cell_to_input_weights_; - std::initializer_list cell_to_forget_weights_; - std::initializer_list cell_to_output_weights_; - std::initializer_list projection_weights_; + std::vector input_to_input_weights_; + std::vector input_to_cell_weights_; + std::vector input_to_forget_weights_; + std::vector input_to_output_weights_; + std::vector input_gate_bias_; + std::vector cell_gate_bias_; + std::vector forget_gate_bias_; + std::vector output_gate_bias_; + std::vector recurrent_to_input_weights_; + std::vector recurrent_to_cell_weights_; + std::vector recurrent_to_forget_weights_; + std::vector recurrent_to_output_weights_; + std::vector cell_to_input_weights_; + std::vector cell_to_forget_weights_; + std::vector cell_to_output_weights_; + std::vector projection_weights_; // LSTM input is stored as num_batch x num_inputs vector. std::vector> lstm_input_; diff --git a/tensorflow/contrib/lite/kernels/mul_test.cc b/tensorflow/contrib/lite/kernels/mul_test.cc index 2807550a6b07f3f9f1f1e3f72acc9882c76d166a..0f9c0c2eee51e7ca2c70aa58edf38d1cc35363f2 100644 --- a/tensorflow/contrib/lite/kernels/mul_test.cc +++ b/tensorflow/contrib/lite/kernels/mul_test.cc @@ -107,7 +107,7 @@ TEST(FloatMulOpTest, ActivationRELU_N1_TO_1) { } TEST(FloatMulOpTest, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -124,7 +124,7 @@ TEST(FloatMulOpTest, VariousInputShapes) { } TEST(FloatMulOpTest, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { FloatMulOpModel m({TensorType_FLOAT32, test_shapes[i]}, @@ -161,7 +161,7 @@ TEST(IntegerMulOpTest, ActivationRELU_N1_TO_1) { } TEST(IntegerMulOpTest, VariousInputShapes) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerMulOpModel m({TensorType_INT32, test_shapes[i]}, @@ -176,7 +176,7 @@ TEST(IntegerMulOpTest, VariousInputShapes) { } TEST(IntegerMulOpTest, WithBroadcast) { - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { IntegerMulOpModel m({TensorType_INT32, test_shapes[i]}, @@ -245,7 +245,7 @@ float GetTolerance(int min, int max) { TEST(QuantizedMulOpTest, WithBroadcast) { float kQuantizedTolerance = GetTolerance(-3.0, 3.0); - std::vector> test_shapes = { + std::vector> test_shapes = { {6}, {2, 3}, {2, 1, 3}, {1, 3, 1, 2}}; for (int i = 0; i < test_shapes.size(); ++i) { QuantizedMulOpModel m({TensorType_UINT8, test_shapes[i], -3.0, 3.0}, diff --git a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc index 843ed0768c050006bdbe759520f453b58798a6a2..226bba2d47769d556bc9d465b223a771f09d9899 100644 --- a/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/sparse_output_fully_connected.cc @@ -88,6 +88,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { const bool is_hybrid_op = (weights->type == kTfLiteUInt8 && input->type == kTfLiteFloat32); + // Resize output. + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + TfLiteIntArray* output_size_array = TfLiteIntArrayCreate(1); + output_size_array->data[0] = 1; + TF_LITE_ENSURE_OK(context, + context->ResizeTensor(context, output, output_size_array)); + if (is_hybrid_op) { TfLiteIntArrayFree(node->temporaries); node->temporaries = TfLiteIntArrayCreate(kNumTemporaryTensors); diff --git a/tensorflow/contrib/lite/kernels/svdf.cc b/tensorflow/contrib/lite/kernels/svdf.cc index 9903fd5c35794238912fb8e23f7da9314d43a08b..e133f37c0c9399504da92e9a325d8e81f44371f9 100644 --- a/tensorflow/contrib/lite/kernels/svdf.cc +++ b/tensorflow/contrib/lite/kernels/svdf.cc @@ -216,9 +216,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = batch_size; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {batch_size}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = batch_size; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h index 84deb0e0e8d618bbc2ab19921bf04919427e5e51..670120219ff4e17e72276a34c98ef4ceb142c361 100644 --- a/tensorflow/contrib/lite/kernels/test_util.h +++ b/tensorflow/contrib/lite/kernels/test_util.h @@ -76,7 +76,7 @@ inline std::vector Dequantize(const std::vector& data, float scale, // A helper struct to construct test tensors. This is particularly useful for // quantized tensor which must have their scale and zero_point defined before // the actual data is known. This mimics what happens in practice: quantization -// parameters are calculate during training. +// parameters are calculated during training. struct TensorData { TensorType type; std::vector shape; diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index ec9cf38b831c22fea97dcf7ea52b56d8efd19c43..40029779e0e1183e74ff529ccaa5f99d28e02f90 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -389,9 +389,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kScalingFactors); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {n_batch}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } @@ -401,10 +402,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kProductScalingFactors); prod_scaling_factors->type = kTfLiteFloat32; prod_scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); - prod_scaling_factors_size->data[0] = n_batch; - if (!TfLiteIntArrayEqual(prod_scaling_factors->dims, - prod_scaling_factors_size)) { + if (!TfLiteIntArrayEqualsArray(prod_scaling_factors->dims, 1, + scaling_dims)) { + TfLiteIntArray* prod_scaling_factors_size = TfLiteIntArrayCreate(1); + prod_scaling_factors_size->data[0] = n_batch; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, prod_scaling_factors, prod_scaling_factors_size)); @@ -418,10 +419,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { GetTemporary(context, node, kRecoveredCellWeights); recovered_cell_weights->type = kTfLiteFloat32; recovered_cell_weights->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1); - recovered_cell_weights_size->data[0] = n_cell; - if (!TfLiteIntArrayEqual(recovered_cell_weights->dims, - recovered_cell_weights_size)) { + int recovered_cell_dims[1] = {n_cell}; + if (!TfLiteIntArrayEqualsArray(recovered_cell_weights->dims, 1, + recovered_cell_dims)) { + TfLiteIntArray* recovered_cell_weights_size = TfLiteIntArrayCreate(1); + recovered_cell_weights_size->data[0] = n_cell; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, recovered_cell_weights, recovered_cell_weights_size)); @@ -431,7 +433,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->builtin_data); + const auto* params = + reinterpret_cast( + node->builtin_data); const TfLiteTensor* input = GetInput(context, node, kInputTensor); const TfLiteTensor* input_to_input_weights = @@ -482,6 +486,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + // Copy out the LSTM specific params so they can be passed in the function. + TfLiteLSTMParams lstm_params; + lstm_params.activation = params->activation; + lstm_params.cell_clip = params->cell_clip; + lstm_params.proj_clip = params->proj_clip; + switch (input_to_output_weights->type) { case kTfLiteFloat32: { return lstm_eval::EvalFloat( @@ -496,7 +506,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { /*aux_input_to_cell_weights=*/nullptr, /*aux_input_to_output_weights=*/nullptr, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, projection_weights, - projection_bias, params, /*forward_sequence=*/true, + projection_bias, &lstm_params, /*forward_sequence=*/true, /*output_offset=*/0, scratch_buffer, activation_state, cell_state, output); } @@ -523,7 +533,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { /*aux_input_to_cell_weights=*/nullptr, /*aux_input_to_output_weights=*/nullptr, input_gate_bias, forget_gate_bias, cell_bias, output_gate_bias, projection_weights, - projection_bias, params, /*forward_sequence=*/true, + projection_bias, &lstm_params, /*forward_sequence=*/true, /*output_offset=*/0, scratch_buffer, scaling_factors, prod_scaling_factors, recovered_cell_weights, input_quantized, /*aux_input_quantized=*/nullptr, activation_state_quantized, diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc index cd3aac053262c37433c1dafe35f8d2b49c2b76ff..c97b0fdd612497336944c721e0dc72b777594e3e 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm_test.cc @@ -110,11 +110,12 @@ class UnidirectionalLSTMOpModel : public SingleOpModel { output_ = AddOutput(TensorType_FLOAT32); - SetBuiltinOp(BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, - BuiltinOptions_LSTMOptions, - CreateLSTMOptions(builder_, ActivationFunctionType_TANH, - cell_clip, proj_clip) - .Union()); + SetBuiltinOp( + BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + CreateUnidirectionalSequenceLSTMOptions( + builder_, ActivationFunctionType_TANH, cell_clip, proj_clip) + .Union()); BuildInterpreter(input_shapes); } diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc index 744ee7c109adcb71dbb1d0dc9604d08354add570..5b9e8dce22024f9de6216f9e198ec05903e68f22 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_rnn.cc @@ -125,9 +125,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2); scaling_factors->type = kTfLiteFloat32; scaling_factors->allocation_type = kTfLiteArenaRw; - TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); - scaling_factors_size->data[0] = batch_size; - if (!TfLiteIntArrayEqual(scaling_factors->dims, scaling_factors_size)) { + int scaling_dims[1] = {batch_size}; + if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) { + TfLiteIntArray* scaling_factors_size = TfLiteIntArrayCreate(1); + scaling_factors_size->data[0] = batch_size; TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, scaling_factors, scaling_factors_size)); } @@ -166,10 +167,10 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input, input->data.f + s * input_size * batch_size; float* output_ptr_batch = output->data.f + s * num_units * batch_size; - kernel_utils::RnnBatchStep(input_ptr_batch, input_weights_ptr, - recurrent_weights_ptr, bias_ptr, input_size, - num_units, batch_size, params->activation, - hidden_state_ptr_batch, output_ptr_batch); + kernel_utils::RnnBatchStep( + input_ptr_batch, input_weights_ptr, recurrent_weights_ptr, bias_ptr, + input_size, num_units, batch_size, num_units, params->activation, + hidden_state_ptr_batch, output_ptr_batch); } } else { // For each batch @@ -185,8 +186,8 @@ TfLiteStatus EvalFloat(const TfLiteTensor* input, kernel_utils::RnnBatchStep( input_ptr_batch, input_weights_ptr, recurrent_weights_ptr, bias_ptr, - input_size, num_units, /*batch_size=*/1, params->activation, - hidden_state_ptr_batch, output_ptr_batch); + input_size, num_units, /*batch_size=*/1, num_units, + params->activation, hidden_state_ptr_batch, output_ptr_batch); } } } @@ -237,8 +238,8 @@ TfLiteStatus EvalHybrid( kernel_utils::RnnBatchStep( input_ptr_batch, input_weights_ptr, input_weights_scale, recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, input_size, - num_units, batch_size, params->activation, quantized_input_ptr, - quantized_hidden_state_ptr, scaling_factors_ptr, + num_units, batch_size, num_units, params->activation, + quantized_input_ptr, quantized_hidden_state_ptr, scaling_factors_ptr, hidden_state_ptr_batch, output_ptr_batch); } } else { @@ -256,8 +257,8 @@ TfLiteStatus EvalHybrid( kernel_utils::RnnBatchStep( input_ptr_batch, input_weights_ptr, input_weights_scale, recurrent_weights_ptr, recurrent_weights_scale, bias_ptr, - input_size, num_units, /*batch_size=*/1, params->activation, - quantized_input_ptr, quantized_hidden_state_ptr, + input_size, num_units, /*batch_size=*/1, num_units, + params->activation, quantized_input_ptr, quantized_hidden_state_ptr, scaling_factors_ptr, hidden_state_ptr_batch, output_ptr_batch); } } diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index d7b109ac1a68ddd83e5c0a38698ad1cdc9715ee4..a8a010be1a18002f030e4a930d9c0d66905d6b75 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -384,6 +384,33 @@ TfLiteStatus InterpreterBuilder::ParseTensors( return status; } +TfLiteStatus InterpreterBuilder::ApplyDelegates(Interpreter* interpreter) { + // TODO(b/117561550): Move flex delegate application to the OpResolver. + if (AcquireFlexDelegate == nullptr) { + return kTfLiteOk; + } + + bool has_flex_op = false; + for (const auto* registration : flatbuffer_op_index_to_registration_) { + if ((registration->builtin_code == BuiltinOperator_CUSTOM) && + IsFlexOp(registration->custom_name)) { + has_flex_op = true; + break; + } + } + + if (!has_flex_op) { + return kTfLiteOk; + } + + if (auto flex_delegate = AcquireFlexDelegate()) { + return interpreter->ModifyGraphWithDelegate(std::move(flex_delegate), + /*allow_dynamic_tensors=*/true); + } + + return kTfLiteOk; +} + TfLiteStatus InterpreterBuilder::operator()( std::unique_ptr* interpreter) { return operator()(interpreter, /*num_threads=*/-1); @@ -466,14 +493,8 @@ TfLiteStatus InterpreterBuilder::operator()( } (**interpreter).SetVariables(std::move(variables)); - // TODO(b/116667551): Only create the flex delegate if the model has flex ops. - if (AcquireFlexDelegate != nullptr) { - if (auto flex_delegate = AcquireFlexDelegate()) { - (**interpreter) - .ModifyGraphWithDelegate(std::move(flex_delegate), - /*allow_dynamic_tensors=*/true); - } - } + if (ApplyDelegates(interpreter->get()) != kTfLiteOk) + return cleanup_and_error(); return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 6abdfcd079af2416d0e6be1be3a48d21616c342e..9505824dcc933b60b89b9b98adcb1f685278b9da 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -173,6 +173,7 @@ class InterpreterBuilder { const flatbuffers::Vector>* buffers, const flatbuffers::Vector>* tensors, Interpreter* interpreter); + TfLiteStatus ApplyDelegates(Interpreter* interpreter); const ::tflite::Model* model_; const OpResolver& op_resolver_; diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index f23a0ccb807ca159c380267b1c5a877d24483c97..31f233352000117d00356a9c323b0e933234b189 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -679,6 +679,8 @@ TfLiteStatus AddOpsAndParams( case tflite::BuiltinOperator_SQUARE: case tflite::BuiltinOperator_ZEROS_LIKE: case tflite::BuiltinOperator_FILL: + case tflite::BuiltinOperator_FLOOR_MOD: + case tflite::BuiltinOperator_RANGE: logError("Op code %d is currently not delegated to NNAPI", builtin); return kTfLiteError; break; diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py index 1bf42d7551f98250969b33377a5a22446692fe46..de936f1fc606e365c918fc99939e8f8a173ec13a 100644 --- a/tensorflow/contrib/lite/python/convert.py +++ b/tensorflow/contrib/lite/python/convert.py @@ -102,20 +102,34 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): return _toco_python.TocoConvert( model_flags_str, toco_flags_str, input_data_str) - with _tempfile.NamedTemporaryFile() as fp_toco, \ - _tempfile.NamedTemporaryFile() as fp_model, \ - _tempfile.NamedTemporaryFile() as fp_input, \ - _tempfile.NamedTemporaryFile() as fp_output: - fp_model.write(model_flags_str) - fp_toco.write(toco_flags_str) - fp_input.write(input_data_str) - fp_model.flush() - fp_toco.flush() - fp_input.flush() - + # Windows and TemporaryFile are not that useful together, + # since you cannot have two readers/writers. So we have to + # make the temporaries and close and delete them explicitly. + toco_filename, model_filename, input_filename, output_filename = ( + None, None, None, None) + try: + # Build all input files + with _tempfile.NamedTemporaryFile(delete=False) as fp_toco, \ + _tempfile.NamedTemporaryFile(delete=False) as fp_model, \ + _tempfile.NamedTemporaryFile(delete=False) as fp_input: + toco_filename = fp_toco.name + input_filename = fp_input.name + model_filename = fp_model.name + fp_model.write(model_flags_str) + fp_toco.write(toco_flags_str) + fp_input.write(input_data_str) + fp_model.flush() + fp_toco.flush() + fp_input.flush() + + # Reserve an output file + with _tempfile.NamedTemporaryFile(delete=False) as fp: + output_filename = fp.name + + # Run cmd = [ - _toco_from_proto_bin, fp_model.name, fp_toco.name, fp_input.name, - fp_output.name + _toco_from_proto_bin, model_filename, toco_filename, input_filename, + output_filename ] cmdline = " ".join(cmd) is_windows = _platform.system() == "Windows" @@ -128,11 +142,19 @@ def toco_convert_protos(model_flags_str, toco_flags_str, input_data_str): stdout, stderr = proc.communicate() exitcode = proc.returncode if exitcode == 0: - stuff = fp_output.read() - return stuff + with open(output_filename, "rb") as fp: + return fp.read() else: - raise RuntimeError("TOCO failed see console for info.\n%s\n%s\n" % - (stdout, stderr)) + raise RuntimeError( + "TOCO failed see console for info.\n%s\n%s\n" % (stdout, stderr)) + finally: + # Must manually cleanup files. + for filename in [ + toco_filename, input_filename, model_filename, output_filename]: + try: + _os.unlink(filename) + except (OSError, TypeError): + pass def tensor_name(x): diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index ff8430827c7849408d17d482f6e065d832d8522a..73805b76186674d82e84729d5f20e9934c652825 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -176,6 +176,8 @@ enum BuiltinOperator : byte { SQUARE = 92, ZEROS_LIKE = 93, FILL = 94, + FLOOR_MOD = 95, + RANGE = 96, } // Options for the builtin operators. @@ -250,6 +252,9 @@ union BuiltinOptions { FillOptions, BidirectionalSequenceLSTMOptions, BidirectionalSequenceRNNOptions, + UnidirectionalSequenceLSTMOptions, + FloorModOptions, + RangeOptions, } enum Padding : byte { SAME, VALID } @@ -394,6 +399,13 @@ table LSTMOptions { kernel_type: LSTMKernelType = FULL; } +// An implementation of TensorFlow dynamic_rnn with LSTMCell. +table UnidirectionalSequenceLSTMOptions { + fused_activation_function:ActivationFunctionType; + cell_clip: float; // Optional, 0.0 means no clipping + proj_clip: float; // Optional, 0.0 means no clipping +} + table BidirectionalSequenceLSTMOptions { fused_activation_function:ActivationFunctionType; cell_clip: float; // Optional, 0.0 means no clipping @@ -610,6 +622,12 @@ table ZerosLikeOptions { table FillOptions { } +table FloorModOptions { +} + +table RangeOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index f3cb113c9c58f8189fda9fb4bdcdab05887362c6..4426b7d407d1e473b70d90ca97c1ee2612eda7ee 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -79,6 +79,9 @@ struct LocalResponseNormalizationOptionsT; struct LSTMOptions; struct LSTMOptionsT; +struct UnidirectionalSequenceLSTMOptions; +struct UnidirectionalSequenceLSTMOptionsT; + struct BidirectionalSequenceLSTMOptions; struct BidirectionalSequenceLSTMOptionsT; @@ -238,6 +241,12 @@ struct ZerosLikeOptionsT; struct FillOptions; struct FillOptionsT; +struct FloorModOptions; +struct FloorModOptionsT; + +struct RangeOptions; +struct RangeOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -398,11 +407,13 @@ enum BuiltinOperator { BuiltinOperator_SQUARE = 92, BuiltinOperator_ZEROS_LIKE = 93, BuiltinOperator_FILL = 94, + BuiltinOperator_FLOOR_MOD = 95, + BuiltinOperator_RANGE = 96, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_FILL + BuiltinOperator_MAX = BuiltinOperator_RANGE }; -inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] { +inline const BuiltinOperator (&EnumValuesBuiltinOperator())[96] { static const BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -497,7 +508,9 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[94] { BuiltinOperator_REDUCE_ANY, BuiltinOperator_SQUARE, BuiltinOperator_ZEROS_LIKE, - BuiltinOperator_FILL + BuiltinOperator_FILL, + BuiltinOperator_FLOOR_MOD, + BuiltinOperator_RANGE }; return values; } @@ -599,6 +612,8 @@ inline const char * const *EnumNamesBuiltinOperator() { "SQUARE", "ZEROS_LIKE", "FILL", + "FLOOR_MOD", + "RANGE", nullptr }; return names; @@ -681,11 +696,14 @@ enum BuiltinOptions { BuiltinOptions_FillOptions = 68, BuiltinOptions_BidirectionalSequenceLSTMOptions = 69, BuiltinOptions_BidirectionalSequenceRNNOptions = 70, + BuiltinOptions_UnidirectionalSequenceLSTMOptions = 71, + BuiltinOptions_FloorModOptions = 72, + BuiltinOptions_RangeOptions = 73, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_BidirectionalSequenceRNNOptions + BuiltinOptions_MAX = BuiltinOptions_RangeOptions }; -inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] { +inline const BuiltinOptions (&EnumValuesBuiltinOptions())[74] { static const BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -757,7 +775,10 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[71] { BuiltinOptions_ZerosLikeOptions, BuiltinOptions_FillOptions, BuiltinOptions_BidirectionalSequenceLSTMOptions, - BuiltinOptions_BidirectionalSequenceRNNOptions + BuiltinOptions_BidirectionalSequenceRNNOptions, + BuiltinOptions_UnidirectionalSequenceLSTMOptions, + BuiltinOptions_FloorModOptions, + BuiltinOptions_RangeOptions }; return values; } @@ -835,6 +856,9 @@ inline const char * const *EnumNamesBuiltinOptions() { "FillOptions", "BidirectionalSequenceLSTMOptions", "BidirectionalSequenceRNNOptions", + "UnidirectionalSequenceLSTMOptions", + "FloorModOptions", + "RangeOptions", nullptr }; return names; @@ -1129,6 +1153,18 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_BidirectionalSequenceRNNOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_UnidirectionalSequenceLSTMOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_FloorModOptions; +}; + +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_RangeOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -1720,6 +1756,30 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_BidirectionalSequenceRNNOptions ? reinterpret_cast(value) : nullptr; } + UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() { + return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } + const UnidirectionalSequenceLSTMOptionsT *AsUnidirectionalSequenceLSTMOptions() const { + return type == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? + reinterpret_cast(value) : nullptr; + } + FloorModOptionsT *AsFloorModOptions() { + return type == BuiltinOptions_FloorModOptions ? + reinterpret_cast(value) : nullptr; + } + const FloorModOptionsT *AsFloorModOptions() const { + return type == BuiltinOptions_FloorModOptions ? + reinterpret_cast(value) : nullptr; + } + RangeOptionsT *AsRangeOptions() { + return type == BuiltinOptions_RangeOptions ? + reinterpret_cast(value) : nullptr; + } + const RangeOptionsT *AsRangeOptions() const { + return type == BuiltinOptions_RangeOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3469,6 +3529,84 @@ inline flatbuffers::Offset CreateLSTMOptions( flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const LSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct UnidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable { + typedef UnidirectionalSequenceLSTMOptions TableType; + ActivationFunctionType fused_activation_function; + float cell_clip; + float proj_clip; + UnidirectionalSequenceLSTMOptionsT() + : fused_activation_function(ActivationFunctionType_NONE), + cell_clip(0.0f), + proj_clip(0.0f) { + } +}; + +struct UnidirectionalSequenceLSTMOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef UnidirectionalSequenceLSTMOptionsT NativeTableType; + enum { + VT_FUSED_ACTIVATION_FUNCTION = 4, + VT_CELL_CLIP = 6, + VT_PROJ_CLIP = 8 + }; + ActivationFunctionType fused_activation_function() const { + return static_cast(GetField(VT_FUSED_ACTIVATION_FUNCTION, 0)); + } + float cell_clip() const { + return GetField(VT_CELL_CLIP, 0.0f); + } + float proj_clip() const { + return GetField(VT_PROJ_CLIP, 0.0f); + } + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_FUSED_ACTIVATION_FUNCTION) && + VerifyField(verifier, VT_CELL_CLIP) && + VerifyField(verifier, VT_PROJ_CLIP) && + verifier.EndTable(); + } + UnidirectionalSequenceLSTMOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct UnidirectionalSequenceLSTMOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + void add_fused_activation_function(ActivationFunctionType fused_activation_function) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_FUSED_ACTIVATION_FUNCTION, static_cast(fused_activation_function), 0); + } + void add_cell_clip(float cell_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_CELL_CLIP, cell_clip, 0.0f); + } + void add_proj_clip(float proj_clip) { + fbb_.AddElement(UnidirectionalSequenceLSTMOptions::VT_PROJ_CLIP, proj_clip, 0.0f); + } + explicit UnidirectionalSequenceLSTMOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + UnidirectionalSequenceLSTMOptionsBuilder &operator=(const UnidirectionalSequenceLSTMOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions( + flatbuffers::FlatBufferBuilder &_fbb, + ActivationFunctionType fused_activation_function = ActivationFunctionType_NONE, + float cell_clip = 0.0f, + float proj_clip = 0.0f) { + UnidirectionalSequenceLSTMOptionsBuilder builder_(_fbb); + builder_.add_proj_clip(proj_clip); + builder_.add_cell_clip(cell_clip); + builder_.add_fused_activation_function(fused_activation_function); + return builder_.Finish(); +} + +flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct BidirectionalSequenceLSTMOptionsT : public flatbuffers::NativeTable { typedef BidirectionalSequenceLSTMOptions TableType; ActivationFunctionType fused_activation_function; @@ -6145,6 +6283,86 @@ inline flatbuffers::Offset CreateFillOptions( flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBufferBuilder &_fbb, const FillOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct FloorModOptionsT : public flatbuffers::NativeTable { + typedef FloorModOptions TableType; + FloorModOptionsT() { + } +}; + +struct FloorModOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef FloorModOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + FloorModOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct FloorModOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit FloorModOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + FloorModOptionsBuilder &operator=(const FloorModOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateFloorModOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + FloorModOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + +struct RangeOptionsT : public flatbuffers::NativeTable { + typedef RangeOptions TableType; + RangeOptionsT() { + } +}; + +struct RangeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef RangeOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + RangeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct RangeOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit RangeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + RangeOptionsBuilder &operator=(const RangeOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateRangeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + RangeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -6488,6 +6706,15 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const BidirectionalSequenceRNNOptions *builtin_options_as_BidirectionalSequenceRNNOptions() const { return builtin_options_type() == BuiltinOptions_BidirectionalSequenceRNNOptions ? static_cast(builtin_options()) : nullptr; } + const UnidirectionalSequenceLSTMOptions *builtin_options_as_UnidirectionalSequenceLSTMOptions() const { + return builtin_options_type() == BuiltinOptions_UnidirectionalSequenceLSTMOptions ? static_cast(builtin_options()) : nullptr; + } + const FloorModOptions *builtin_options_as_FloorModOptions() const { + return builtin_options_type() == BuiltinOptions_FloorModOptions ? static_cast(builtin_options()) : nullptr; + } + const RangeOptions *builtin_options_as_RangeOptions() const { + return builtin_options_type() == BuiltinOptions_RangeOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -6799,6 +7026,18 @@ template<> inline const BidirectionalSequenceRNNOptions *Operator::builtin_optio return builtin_options_as_BidirectionalSequenceRNNOptions(); } +template<> inline const UnidirectionalSequenceLSTMOptions *Operator::builtin_options_as() const { + return builtin_options_as_UnidirectionalSequenceLSTMOptions(); +} + +template<> inline const FloorModOptions *Operator::builtin_options_as() const { + return builtin_options_as_FloorModOptions(); +} + +template<> inline const RangeOptions *Operator::builtin_options_as() const { + return builtin_options_as_RangeOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -7809,6 +8048,38 @@ inline flatbuffers::Offset CreateLSTMOptions(flatbuffers::FlatBuffe _kernel_type); } +inline UnidirectionalSequenceLSTMOptionsT *UnidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new UnidirectionalSequenceLSTMOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void UnidirectionalSequenceLSTMOptions::UnPackTo(UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; + { auto _e = fused_activation_function(); _o->fused_activation_function = _e; }; + { auto _e = cell_clip(); _o->cell_clip = _e; }; + { auto _e = proj_clip(); _o->proj_clip = _e; }; +} + +inline flatbuffers::Offset UnidirectionalSequenceLSTMOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateUnidirectionalSequenceLSTMOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateUnidirectionalSequenceLSTMOptions(flatbuffers::FlatBufferBuilder &_fbb, const UnidirectionalSequenceLSTMOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const UnidirectionalSequenceLSTMOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + auto _fused_activation_function = _o->fused_activation_function; + auto _cell_clip = _o->cell_clip; + auto _proj_clip = _o->proj_clip; + return tflite::CreateUnidirectionalSequenceLSTMOptions( + _fbb, + _fused_activation_function, + _cell_clip, + _proj_clip); +} + inline BidirectionalSequenceLSTMOptionsT *BidirectionalSequenceLSTMOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new BidirectionalSequenceLSTMOptionsT(); UnPackTo(_o, _resolver); @@ -9151,6 +9422,52 @@ inline flatbuffers::Offset CreateFillOptions(flatbuffers::FlatBuffe _fbb); } +inline FloorModOptionsT *FloorModOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new FloorModOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void FloorModOptions::UnPackTo(FloorModOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset FloorModOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateFloorModOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateFloorModOptions(flatbuffers::FlatBufferBuilder &_fbb, const FloorModOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FloorModOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateFloorModOptions( + _fbb); +} + +inline RangeOptionsT *RangeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new RangeOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void RangeOptions::UnPackTo(RangeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset RangeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateRangeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateRangeOptions(flatbuffers::FlatBufferBuilder &_fbb, const RangeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RangeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateRangeOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -9620,6 +9937,18 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -9918,6 +10247,18 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -10204,6 +10545,18 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateBidirectionalSequenceRNNOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + return CreateUnidirectionalSequenceLSTMOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(value); + return CreateFloorModOptions(_fbb, ptr, _rehasher).Union(); + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(value); + return CreateRangeOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -10490,6 +10843,18 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new BidirectionalSequenceRNNOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + value = new UnidirectionalSequenceLSTMOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_FloorModOptions: { + value = new FloorModOptionsT(*reinterpret_cast(u.value)); + break; + } + case BuiltinOptions_RangeOptions: { + value = new RangeOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -10847,6 +11212,21 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_UnidirectionalSequenceLSTMOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_FloorModOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } + case BuiltinOptions_RangeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index f0bfec23385151c047aba063cae951334f359222..891d44d2b60c713532d7e0e1b2c347ab891eb718 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -20,8 +20,7 @@ load( name = "zip_test_%s" % test_name, size = "large", srcs = ["generated_examples_zip_test.cc"], - args = [ - ] + select({ + args = args + select({ "//tensorflow:android": [], "//conditions:default": [ "--zip_file_path=$(location :zip_%s)" % test_name, @@ -35,7 +34,7 @@ load( ":zip_%s" % test_name, ], shard_count = 20, - tags = [ + tags = tags + [ "gen_zip_test", "no_oss", "tflite_not_portable_intentional", @@ -61,7 +60,7 @@ load( "//tensorflow/core:android_tensorflow_test_lib", ], }), -) for conversion_mode, test_name in generated_test_models_all()] +) for conversion_mode, test_name, tags, args in generated_test_models_all()] test_suite( name = "generated_zip_tests", @@ -297,6 +296,29 @@ cc_test( ], ) +cc_library( + name = "init_tensorflow", + srcs = [ + "init_tensorflow.cc", + ], + hdrs = [ + "init_tensorflow.h", + ], + visibility = [ + "//tensorflow/contrib/lite/java/src/main/native:__subpackages__", + "//tensorflow/contrib/lite/testing:__subpackages__", + "//tensorflow/contrib/lite/tools/benchmark:__subpackages__", + ], + deps = select({ + "//conditions:default": [ + "//tensorflow/core:lib", + ], + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + ], + }), +) + cc_library( name = "tflite_diff_util", srcs = ["tflite_diff_util.cc"], @@ -349,6 +371,7 @@ tf_cc_test( "tflite_not_portable", ], deps = [ + ":init_tensorflow", ":tflite_diff_flags", ":tflite_diff_util", ], @@ -358,37 +381,10 @@ cc_binary( name = "tflite_diff", srcs = ["tflite_diff_example_test.cc"], deps = [ + ":init_tensorflow", ":tflite_diff_flags", ":tflite_diff_util", ], ) -py_binary( - name = "model_coverage_lib", - srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib.py"], - srcs_version = "PY2AND3", - tags = ["no_pip"], - visibility = ["//tensorflow/contrib/lite:__subpackages__"], - deps = [ - "//tensorflow/contrib/lite/python:lite", - "//tensorflow/python:platform", - ], -) - -py_test( - name = "model_coverage_lib_test", - srcs = ["//tensorflow/contrib/lite/testing:model_coverage/model_coverage_lib_test.py"], - srcs_version = "PY2AND3", - tags = [ - "no_oss", - "no_pip", - "no_windows", - "notap", - ], - deps = [ - ":model_coverage_lib", - "//tensorflow/python:client_testlib", - ], -) - tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/testing/init_tensorflow.cc b/tensorflow/contrib/lite/testing/init_tensorflow.cc new file mode 100644 index 0000000000000000000000000000000000000000..f3dcf620a20e3043b7d177b1fc94bbb37550e436 --- /dev/null +++ b/tensorflow/contrib/lite/testing/init_tensorflow.cc @@ -0,0 +1,31 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/testing/init_tensorflow.h" + +#include +#include + +#include "tensorflow/core/platform/init_main.h" + +namespace tflite { +void InitTensorFlow() { + static const char* kFakeName = "fake program name"; + int argc = 1; + char* fake_name_copy = strdup(kFakeName); + char** argv = &fake_name_copy; + ::tensorflow::port::InitMain(kFakeName, &argc, &argv); + free(fake_name_copy); +} +} // namespace tflite diff --git a/tensorflow/contrib/lite/testing/init_tensorflow.h b/tensorflow/contrib/lite/testing/init_tensorflow.h new file mode 100644 index 0000000000000000000000000000000000000000..2cc89bbbcade5e9a94c25612257e1c78abf6d1ac --- /dev/null +++ b/tensorflow/contrib/lite/testing/init_tensorflow.h @@ -0,0 +1,26 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_ +#define TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_ + +namespace tflite { + +// Initializes tensorflow's libraries. Note that this simulates an empty +// command line, so flags are not initialized. +void InitTensorFlow(); + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_TESTING_INIT_TENSORFLOW_H_ diff --git a/tensorflow/contrib/lite/testing/model_coverage/BUILD b/tensorflow/contrib/lite/testing/model_coverage/BUILD new file mode 100644 index 0000000000000000000000000000000000000000..c8359bab064b7c487a5b0e2303e76bc348b11ce1 --- /dev/null +++ b/tensorflow/contrib/lite/testing/model_coverage/BUILD @@ -0,0 +1,33 @@ +package(default_visibility = [ + "//tensorflow/contrib/lite:__subpackages__", +]) + +licenses(["notice"]) # Apache 2.0 + +py_binary( + name = "model_coverage_lib", + srcs = ["model_coverage_lib.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + "//tensorflow/contrib/lite/python:lite", + "//tensorflow/python:platform", + ], +) + +py_test( + name = "model_coverage_lib_test", + srcs = ["model_coverage_lib_test.py"], + srcs_version = "PY2AND3", + tags = [ + "manual", + "no_oss", + "no_pip", + "no_windows", + "notap", + ], + deps = [ + ":model_coverage_lib", + "//tensorflow/python:client_testlib", + ], +) diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py index 72029ed03ce72d2bbfcca176edec08f8c86f8dbe..ab29f71138d57381c929d7471c5edb3f87a211be 100644 --- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py +++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py @@ -297,7 +297,7 @@ def test_saved_model(directory, tag_set=None, signature_key=None, **kwargs): compare_models_random_data(tflite_model, tf_eval_func) -def test_keras_model(filename, **kwargs): +def test_keras_model(filename, input_arrays=None, input_shapes=None, **kwargs): """Validates the tf.keras model converts to a TFLite model. Converts the tf.keras model to TFLite and checks the accuracy of the model on @@ -305,9 +305,15 @@ def test_keras_model(filename, **kwargs): Args: filename: Full filepath of HDF5 file containing the tf.keras model. + input_arrays: List of input tensors to freeze graph with. + input_shapes: Dict of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : None}). + (default None) **kwargs: Additional arguments to be passed into the converter. """ - converter = _lite.TFLiteConverter.from_keras_model_file(filename) + converter = _lite.TFLiteConverter.from_keras_model_file( + filename, input_arrays=input_arrays, input_shapes=input_shapes) tflite_model = _convert(converter, **kwargs) tf_eval_func = evaluate_keras_model(filename) diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py index e07202b1a671f88d64b0dc5d25f412b8daaea809..6989a9ef01b32e74b2ceb3fef5bc62735d838cf5 100644 --- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py +++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py @@ -86,22 +86,17 @@ class EvaluateFrozenGraph(test.TestCase): def testQuantized(self): filename = self._getQuantizedModel() - model_coverage.test_frozen_graph_quant(filename, ['inputA', 'inputB'], - ['output']) + model_coverage.test_frozen_graph_quant(filename, ['inputA'], ['output']) def testQuantizedInputShapes(self): filename = self._getQuantizedModel() model_coverage.test_frozen_graph_quant( - filename, ['inputA', 'inputB'], ['output'], - input_shapes={ - 'inputA': [33, 33], - 'inputB': [33, 33], - }) + filename, ['inputA'], ['output'], input_shapes={'inputA': [33, 33]}) def testQuantizedFlexAll(self): filename = self._getQuantizedModel() model_coverage.test_frozen_graph_quant( - filename, ['inputA', 'inputB'], ['output'], + filename, ['inputA'], ['output'], converter_mode=lite.ConverterMode.TOCO_FLEX_ALL) diff --git a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc index f2c49fe389763110279b3dd1e4f13b1522de0460..49696ac76be9c2e7015258fda4753dd91d346846 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc +++ b/tensorflow/contrib/lite/testing/tflite_diff_example_test.cc @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/lite/testing/init_tensorflow.h" #include "tensorflow/contrib/lite/testing/tflite_diff_flags.h" #include "tensorflow/contrib/lite/testing/tflite_diff_util.h" int main(int argc, char** argv) { + ::tflite::InitTensorFlow(); // For Flex support. + ::tflite::testing::DiffOptions options = ::tflite::testing::ParseTfliteDiffFlags(&argc, argv); if (options.tensorflow_model.empty()) return 1; diff --git a/tensorflow/contrib/lite/toco/README.md b/tensorflow/contrib/lite/toco/README.md index 2db6a627ab59604a99cafe3b38df08b70092d989..91f6f618a376ff4df7c51dfd285152229f4757cc 100644 --- a/tensorflow/contrib/lite/toco/README.md +++ b/tensorflow/contrib/lite/toco/README.md @@ -1,6 +1,6 @@ -# TOCO: TensorFlow Lite Optimizing Converter +# TensorFlow Lite Converter -The TensorFlow Lite Optimizing Converter converts TensorFlow graphs into +The TensorFlow Lite Converter converts TensorFlow graphs into TensorFlow Lite graphs. There are additional usages that are also detailed in the usage documentation. @@ -14,9 +14,10 @@ Usage information is given in these documents: ## Where the converter fits in the TensorFlow landscape -Once an application developer has a trained TensorFlow model, TOCO will accept +Once an application developer has a trained TensorFlow model, the TensorFlow +Lite Converter will accept that model and generate a TensorFlow Lite -[FlatBuffer](https://google.github.io/flatbuffers/) file. TOCO currently supports +[FlatBuffer](https://google.github.io/flatbuffers/) file. The converter currently supports [SavedModels](https://www.tensorflow.org/guide/saved_model#using_savedmodel_with_estimators), frozen graphs (models generated via [freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py)), diff --git a/tensorflow/contrib/lite/toco/g3doc/README.md b/tensorflow/contrib/lite/toco/g3doc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2153b6cc6360a7a0e0375600c83b0c0945d3b326 --- /dev/null +++ b/tensorflow/contrib/lite/toco/g3doc/README.md @@ -0,0 +1,3 @@ +# TOCO + +These files have moved to [../../g3doc/tflite_convert](../../g3doc/tflite_convert) diff --git a/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg b/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg deleted file mode 100644 index 335debde57a1576ecca97f7a7398811973cc7bcc..0000000000000000000000000000000000000000 --- a/tensorflow/contrib/lite/toco/g3doc/toco_landscape.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc index 310a88484c246b8035aa73b5e04ad677d575e4c4..8a945ac4350f2125fe018b6ae3468e19acb77646 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_expanddims_to_reshape.cc @@ -25,10 +25,13 @@ limitations under the License. namespace toco { -bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertExpandDimsToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto expand_it = model->operators.begin() + op_index; if (expand_it->get()->type != OperatorType::kExpandDims) { - return false; + return ::tensorflow::Status::OK(); } ExpandDimsOperator* expand_op = static_cast(expand_it->get()); @@ -38,18 +41,18 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) { const auto& input_array = model->GetArray(expand_op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } const auto& axis_array = model->GetArray(expand_op->inputs[1]); if (!axis_array.has_shape()) { // Yield until input axis array shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(RequiredBufferSizeForShape(axis_array.shape()), 1); if (!axis_array.buffer) { // Yield until the input axis array is constant - return false; + return ::tensorflow::Status::OK(); } int axis = axis_array.GetBuffer().data[0]; std::vector reshape_dims(input_array.shape().dims()); @@ -90,7 +93,8 @@ bool ConvertExpandDimsToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(expand_it->get(), expand_op); model->operators.erase(expand_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc index e88839be5d43670dec45d3a5da5e1d6b9000ac63..a1510128910d74982e464c73596b0d6db5d3171f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_pure_conv_to_depthwise.cc @@ -24,29 +24,32 @@ limitations under the License. namespace toco { -bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertPureConvToDepthwise::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto conv_it = model->operators.begin() + op_index; if (conv_it->get()->type != OperatorType::kConv) { - return false; + return ::tensorflow::Status::OK(); } const auto* conv_op = static_cast(conv_it->get()); if (conv_op->stride_width != conv_op->stride_height) { - return false; + return ::tensorflow::Status::OK(); } if ((conv_op->dilation_width_factor != 1) || (conv_op->dilation_height_factor != 1)) { // Depthwise conv does not support dilation - return false; + return ::tensorflow::Status::OK(); } auto& input_array = model->GetArray(conv_op->inputs[0]); if (!input_array.has_shape()) { // Shapes not propagated yet - return false; + return ::tensorflow::Status::OK(); } if (input_array.shape().dims(3) != 1) { // Not a pure convolution: Conv does accumulation across the depth // dimension. - return false; + return ::tensorflow::Status::OK(); } const auto& weights_name = conv_op->inputs[1]; @@ -56,15 +59,15 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { "Not changing %s to DepthwiseConv because the weights is consumed by " "another op.", LogName(*conv_op)); - return false; + return ::tensorflow::Status::OK(); } auto& weights_array = model->GetArray(weights_name); if (!weights_array.buffer) { // Yield until the weights are resolved as a constant array. - return false; + return ::tensorflow::Status::OK(); } if (weights_array.data_type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } // At this point we know we have a pure conv. Rewrite it as DepthwiseConv. AddMessageF( @@ -112,7 +115,8 @@ bool ConvertPureConvToDepthwise::Run(Model* model, std::size_t op_index) { } *weights_array.mutable_shape()->mutable_dims() = {1, width, height, depth}; weights_buffer.data = depthwise_conv_weights_data; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc index 0d274fc687c8d42d47ddb5beb4f9c6f39b417097..4a264e1cf1d4fcceb47451ffe01b4e45da39f463 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_reorder_axes.cc @@ -86,9 +86,12 @@ TransposeOperator* CreateTransposeFromReorderAxes( // Converts ReorderAxes into Transpose and Reshape which are compatible with the // TFLite interpreter. -bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertReorderAxes::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto reorder_it = model->operators.begin() + op_index; - if (reorder_it->get()->type != OperatorType::kReorderAxes) return false; + if (reorder_it->get()->type != OperatorType::kReorderAxes) + return ::tensorflow::Status::OK(); auto* reorder_op = static_cast(reorder_it->get()); CHECK_EQ(reorder_op->inputs.size(), 1); @@ -113,8 +116,9 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) { // Yield if input array contains constants or if output array size has not // been adjusted to reflect the permutations in ReorderAxes. ReorderAxes will // be merged into a constant array when possible. - if (IsConstantParameterArray(*model, constant_input_array_name)) return false; - if (!output_array.has_shape()) return false; + if (IsConstantParameterArray(*model, constant_input_array_name)) + return ::tensorflow::Status::OK(); + if (!output_array.has_shape()) return ::tensorflow::Status::OK(); const auto input_axes_order = reorder_op->input_axes_order; const auto output_axes_order = reorder_op->output_axes_order; @@ -143,7 +147,8 @@ bool ConvertReorderAxes::Run(Model* model, std::size_t op_index) { CHECK_EQ(reorder_it->get(), reorder_op); model->operators.erase(reorder_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc index 81cedb5dad751aacbbb32326db73de386aba282d..a0bd1ed4a4d8a1b7119a614efcdae9b2b122466c 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc @@ -30,10 +30,13 @@ namespace toco { // means that the data layout will never change with this op, just the shape. // By converting these to reshapes once we have run shape propagation we allow // standard reshape optimization transforms to do their magic. -bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertSqueezeToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto squeeze_it = model->operators.begin() + op_index; if (squeeze_it->get()->type != OperatorType::kSqueeze) { - return false; + return ::tensorflow::Status::OK(); } auto squeeze_op = static_cast(squeeze_it->get()); CHECK_EQ(squeeze_op->inputs.size(), 1); @@ -42,16 +45,16 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { const auto& input_array = model->GetArray(squeeze_op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } if (input_array.shape().dimensions_count() == 0) { // Input array cannot be 0-D. - return false; + return ::tensorflow::Status::OK(); } if (!model->HasArray(squeeze_op->outputs[0]) || !model->GetArray(squeeze_op->outputs[0]).has_shape()) { // Yield until shape propagation has set the output shape for us. - return false; + return ::tensorflow::Status::OK(); } // We use the output shape that has been calculated by shape propagation. @@ -59,7 +62,7 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { // Empty shapes will not work as empty data arrays. if (output_shape.dimensions_count() == 0) { - return false; + return ::tensorflow::Status::OK(); } auto* reshape_op = new TensorFlowReshapeOperator; @@ -79,7 +82,8 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(squeeze_it->get(), squeeze_op); model->operators.erase(squeeze_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc index dcaaddbf3b5409f0fc3ddaf32e23b1e5eefb6565..d7cacf77f48b6b5720885261f3cb701d8a7b5019 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_addn_to_add.cc @@ -20,10 +20,13 @@ namespace toco { // This pass will convert an AddN operator with only 2 inputs into a regular Add // operator, to which more optimizations may apply. -bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialAddNToAdd::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto addn_it = model->operators.begin() + op_index; if (addn_it->get()->type != OperatorType::kAddN) { - return false; + return ::tensorflow::Status::OK(); } AddNOperator* addn_op = static_cast(addn_it->get()); CHECK_GE(addn_op->inputs.size(), 2); @@ -31,7 +34,7 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) { // We only reduce AddN with N=2 to a regular Add. if (addn_op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } // Copy inputs & outputs to regular Add. @@ -45,7 +48,8 @@ bool ConvertTrivialAddNToAdd::Run(Model* model, std::size_t op_index) { addn_it = add_it + 1; CHECK_EQ(addn_it->get(), addn_op); model->operators.erase(addn_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc index 75113a2a8c7c446bd13de8b5c1a8d8ef3cf7fdd6..78779243a9e15d7d25078dfd4d375947c06779e5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_pack_to_reshape.cc @@ -25,27 +25,30 @@ limitations under the License. namespace toco { -bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialPackToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto pack_it = model->operators.begin() + op_index; if (pack_it->get()->type != OperatorType::kPack) { - return false; + return ::tensorflow::Status::OK(); } auto* pack_op = static_cast(pack_it->get()); if (pack_op->inputs.size() > 1) { // Not trivial. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(pack_op->outputs.size(), 1); const auto& input_array = model->GetArray(pack_op->inputs[0]); if (!input_array.has_shape()) { // Yield until input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } if (input_array.shape().dimensions_count() == 0) { // Input array cannot be 0-D. // (Unsure if this is TF behavior, but was required to get a test to pass.) - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Converting trivial %s to a reshape", LogName(*pack_op)); @@ -75,7 +78,8 @@ bool ConvertTrivialPackToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(pack_it->get(), pack_op); model->operators.erase(pack_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc index b689be07926ecd9be4cc317735dc88eb90950e13..b6d712ca44c3e85a8b8a56a7d758d2ff65ca39fa 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_tile_to_concat.cc @@ -21,10 +21,13 @@ limitations under the License. namespace toco { -bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialTileToConcat::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto tile_it = model->operators.begin() + op_index; if (tile_it->get()->type != OperatorType::kTile) { - return false; + return ::tensorflow::Status::OK(); } auto* tile_op = static_cast(tile_it->get()); @@ -34,13 +37,13 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { if (!input_array.has_shape() || !multiples_array.has_shape() || !output_array.has_shape()) { // Yield until PropagateFixedSizes has been run on this op. - return false; + return ::tensorflow::Status::OK(); } // Note: We can assume we have error checked inputs in PropagateFixedSizes. if (!multiples_array.buffer) { // Yield until the multiples is constant. - return false; + return ::tensorflow::Status::OK(); } std::vector const& multiples = multiples_array.GetBuffer().data; @@ -59,7 +62,7 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { // The tile is non-trivial. Good luck. AddMessageF("Tile %s is non-trivial (has more than one multiply dimension)", LogName(*tile_op)); - return false; + return ::tensorflow::Status::OK(); } // The tile is like a concat. @@ -88,7 +91,8 @@ bool ConvertTrivialTileToConcat::Run(Model* model, std::size_t op_index) { CHECK_EQ(tile_it->get(), tile_op); model->operators.erase(tile_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc index 5a36a90b3841504d6f018832777e50bac95218d7..e5a96d433517616dc0f283a736bcfab3ebe71694 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc @@ -48,10 +48,13 @@ bool TransposeAffectsMemoryOrder(std::vector perm, } // namespace -bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ConvertTrivialTransposeToReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto transpose_it = model->operators.begin() + op_index; if (transpose_it->get()->type != OperatorType::kTranspose) { - return false; + return ::tensorflow::Status::OK(); } TransposeOperator* transpose_op = static_cast(transpose_it->get()); @@ -60,14 +63,14 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { const auto& output_array = model->GetArray(transpose_op->outputs[0]); if (!input_array.has_shape() || !output_array.has_shape()) { // Yield until PropagateFixedSizes has been run on this op. - return false; + return ::tensorflow::Status::OK(); } // Note: We can assume we have error checked inputs in PropagateFixedSizes. // Check that the permutation has propogated. std::vector const& perm = transpose_op->perm; if (perm.empty()) { - return false; + return ::tensorflow::Status::OK(); } // This transpose is trivial if non-unitary dimensions remain in the same @@ -76,7 +79,7 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { std::vector const& output_dims = output_array.shape().dims(); if (TransposeAffectsMemoryOrder(perm, input_dims)) { - return false; + return ::tensorflow::Status::OK(); } // This transpose is trivial. Replace it with a Reshape op. @@ -109,7 +112,8 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { CHECK_EQ(transpose_it->get(), transpose_op); model->operators.erase(transpose_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc index 1e68cd678bce6c27f1852a5ae0c13362d8938cdd..ebc0e9afca22d031a178ecb975f0ef722d7575ab 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/create_im2col_arrays.cc @@ -73,18 +73,22 @@ bool ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) { return true; } -bool CreateIm2colArrays::Run(Model* model, std::size_t op_index) { +::tensorflow::Status CreateIm2colArrays::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); switch (op->type) { case OperatorType::kConv: - return ProcessConvOperator(model, static_cast(op)); + *modified = ProcessConvOperator(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kTransposeConv: - return ProcessTransposeConvOperator( + *modified = ProcessTransposeConvOperator( model, static_cast(op)); + return ::tensorflow::Status::OK(); default: - return false; + return ::tensorflow::Status::OK(); } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc index 1688586733b0434c7fc98686a19f0ceb8092f33b..2119174950b1ade67c66a47e65b9808c216a7c54 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/dequantize.cc @@ -186,24 +186,27 @@ bool DequantizeArray(const string& array_name, } // namespace -bool Dequantize::Run(Model* model, std::size_t op_index) { +::tensorflow::Status Dequantize::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; auto* op = op_it->get(); if (op->type == OperatorType::kDequantize) { auto& input_array = model->GetArray(op->inputs[0]); if (input_array.data_type == ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } if (input_array.final_data_type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } input_array.data_type = ArrayDataType::kFloat; input_array.quantization_params = nullptr; auto& output_array = model->GetArray(op->outputs[0]); output_array.data_type = ArrayDataType::kFloat; output_array.quantization_params = nullptr; - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } std::vector arrays; @@ -220,7 +223,8 @@ bool Dequantize::Run(Model* model, std::size_t op_index) { } } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc index 95558ef5ece9a78825daf0203e2f6f6fee6f3cda..1555cf60a1cdeabce949e4b3ab3823b6df1128e4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_fake_quant.cc @@ -25,21 +25,23 @@ limitations under the License. namespace toco { -bool DropFakeQuant::Run(Model* model, std::size_t op_index) { +::tensorflow::Status DropFakeQuant::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(fakequant_base_op); if (!fakequant_op->minmax) { - return false; + return ::tensorflow::Status::OK(); } const auto& output_array = model->GetArray(fakequant_op->outputs[0]); if (!output_array.minmax) { - return false; + return ::tensorflow::Status::OK(); } // Drop min/max inputs @@ -50,7 +52,8 @@ bool DropFakeQuant::Run(Model* model, std::size_t op_index) { } fakequant_op->inputs.resize(1); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc index f7fd878b7e8b1c834125130ea2a778cecefd3de0..7d66ea5dd234515fb315f59d1a80b9690b966493 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/drop_im2col_arrays.cc @@ -19,15 +19,17 @@ limitations under the License. namespace toco { -bool DropIm2colArrays::Run(Model* model, std::size_t op_index) { +::tensorflow::Status DropIm2colArrays::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto conv_it = model->operators.begin() + op_index; if (conv_it->get()->type != OperatorType::kConv) { - return false; + return ::tensorflow::Status::OK(); } auto* conv_op = static_cast(conv_it->get()); if (conv_op->outputs.size() < 2) { // Conv op does not have im2col. - return false; + return ::tensorflow::Status::OK(); } // Drop the im2col array. @@ -36,7 +38,8 @@ bool DropIm2colArrays::Run(Model* model, std::size_t op_index) { conv_op->outputs.resize(1); AddMessageF("Dropped an im2col array for %s", LogName(*conv_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc index e80ed036b311cfc586c40ece410ef6a6432a0cd9..72b1dda3be584b8c7f0168f8c845a1c74a487c2b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_bias_vectors.cc @@ -62,17 +62,20 @@ bool ProcessLinearOperator(Model* model, Operator* op) { } } // namespace -bool EnsureBiasVectors::Run(Model* model, std::size_t op_index) { +::tensorflow::Status EnsureBiasVectors::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto* op = model->operators[op_index].get(); if (op->type == OperatorType::kConv || op->type == OperatorType::kDepthwiseConv || op->type == OperatorType::kFullyConnected) { if (ProcessLinearOperator(model, op)) { AddMessageF("Added bias vector to %s as %s", LogName(*op), op->inputs[2]); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc index c13fc0de7502a9edc80dc399354708a5b1b96b02..60dcd5268442fe08ed69ea0081ad3323a897f146 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc @@ -108,8 +108,9 @@ namespace toco { // we can foresee these 'fast int8 kernels' to remain important to have into // the 2020s. // -bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status EnsureUint8WeightsSafeForFastInt8Kernels::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto& op = *model->operators[op_index]; int weights_index = 0; switch (op.type) { @@ -148,16 +149,16 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model, // That's why at the moment we only handle operators that use a GEMM // (Conv, fully-connected --- note that LSTM merely wraps a // fully-connected operator). - return false; + return ::tensorflow::Status::OK(); } const string& name = op.inputs[weights_index]; auto& array = model->GetArray(name); if (!array.buffer) { - return false; + return ::tensorflow::Status::OK(); } if (array.data_type != ArrayDataType::kUint8) { - return false; + return ::tensorflow::Status::OK(); } auto& buffer_data = array.GetMutableBuffer().data; @@ -212,7 +213,8 @@ bool EnsureUint8WeightsSafeForFastInt8Kernels::Run(Model* model, AddMessageF("Tweaked weights values for %s", LogName(op)); } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc index c5ce3fcd95eb0aaf63dcc7f43b96d8a13ed93929..88511a7d3c42582c20a049eac9974dea10810170 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_activation_functions.cc @@ -25,27 +25,30 @@ limitations under the License. namespace toco { -bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseActivationFunctions::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto ac_it = model->operators.begin() + op_index; const auto* ac_op = ac_it->get(); if (ac_op->type != OperatorType::kRelu6 && ac_op->type != OperatorType::kRelu1 && ac_op->type != OperatorType::kRelu) { - return false; + return ::tensorflow::Status::OK(); } // Find the op producing the array passed to this activation function Operator* op = GetOpWithOutput(*model, ac_op->inputs[0]); - if (!op) return false; + if (!op) return ::tensorflow::Status::OK(); if (CountTrueOutputs(*model, *op) > 1) { AddMessageF( "Not fusing activation function %s into %s because it has more than " "one consumed output", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->outputs[0], ac_op->inputs[0]); @@ -57,7 +60,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function into %s because it is consumed by more " "than 1 other operator", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } if (!IsDiscardableArray(*model, op->outputs[0])) { @@ -65,7 +68,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function %s into %s because output %s it is not " "discardable", LogName(*ac_op), LogName(*op), op->outputs[0]); - return false; + return ::tensorflow::Status::OK(); } if (op->fused_activation_function != FusedActivationFunctionType::kNone) { @@ -73,7 +76,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function %s into %s because it already has a " "fused activation function", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } if (!OperatorSupportsFusedActivation(op->type)) { @@ -81,7 +84,7 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { "Not fusing activation function %s because the %s op doesn't support " "it", LogName(*ac_op), LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Fusing activation function %s into the preceding %s", @@ -98,7 +101,8 @@ bool FuseActivationFunctions::Run(Model* model, std::size_t op_index) { model->EraseArray(ac_op->inputs[0]); op->outputs[0] = ac_op->outputs[0]; model->operators.erase(ac_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc index dcbbead517f26a227363989b5af2a4040c98ff57..0de22b8ff4276c4a68da979e7de74b4751b6267f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_following_affine.cc @@ -150,14 +150,17 @@ void FuseMulOrDivParamsIntoFollowingAffine(Model* model, Operator* following_op, } // namespace -bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseBinaryIntoFollowingAffine::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; auto* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -175,12 +178,12 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can fuse into a constant. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants // propagation, not for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -192,7 +195,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { if (index_of_constant_input != 1) { AddMessageF("Not fusing %s because the denominator is not constant", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -204,7 +207,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s into the following affine op, because we only know " "how to do so when the constant operand is a scalar", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -212,7 +215,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { FusedActivationFunctionType::kNone) { AddMessageF("Not fusing %s because it has a fused activation function", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } Operator* following_op = GetOpWithInput(*model, binary_op->outputs[0]); @@ -221,7 +224,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { AddMessageF( "Not fusing %s because it is not consumed by exactly one other op", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } if (following_op->type != OperatorType::kConv && @@ -231,14 +234,14 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the following %s is not of one of the supported " "types", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } if (following_op->inputs.size() < 3) { AddMessageF( "Not fusing %s because the following %s does not have a bias vector", LogName(*following_op), LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } const auto& weights = model->GetArray(following_op->inputs[1]); @@ -248,7 +251,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the following %s has non-constant weights or " "bias arrays", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } // Try to fuse the binary params into the following op's params @@ -260,7 +263,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { AddMessageF( "Not fusing %s because the following %s does not use VALID padding", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } } if (following_op->type == OperatorType::kDepthwiseConv) { @@ -269,7 +272,7 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { AddMessageF( "Not fusing %s because the following %s does not use VALID padding", LogName(*binary_op), LogName(*following_op)); - return false; + return ::tensorflow::Status::OK(); } } FuseAddOrSubParamsIntoFollowingAffine(model, following_op, binary_op, @@ -294,7 +297,8 @@ bool FuseBinaryIntoFollowingAffine::Run(Model* model, std::size_t op_index) { model->EraseArray(old_constant_param_name); } model->operators.erase(binary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc index b324631579f9ba6d68db034b62727ec1e17e9a76..b8da756d857355b8cf90838375bd879df2c06755 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_binary_into_preceding_affine.cc @@ -188,14 +188,17 @@ void FuseMulOrDivParamsIntoPrecedingAffine(Model* model, Operator* preceding_op, } } // namespace -bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseBinaryIntoPrecedingAffine::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; const auto* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -213,12 +216,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can fuse into a constant. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants // propagation, not for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -230,7 +233,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { if (index_of_constant_input != 1) { AddMessageF("Not fusing %s because the denominator is not constant", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -239,12 +242,12 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { if (!preceding_op) { AddMessageF("Not fusing %s because it is not the output of another op", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } for (const string& output_array : model->flags.output_arrays()) { if (preceding_op->outputs[0] == output_array) { - return false; + return ::tensorflow::Status::OK(); } } @@ -255,7 +258,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s is not of one of the supported " "types", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (preceding_op->fused_activation_function != @@ -264,14 +267,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s has a fused activation " "function", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (preceding_op->inputs.size() < 3) { AddMessageF( "Not fusing %s because the preceding %s does not have a bias vector", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } const auto& weights_name = preceding_op->inputs[1]; @@ -289,14 +292,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s has a non-constant bias " "array", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (count_ops_consuming_bias > 1) { AddMessageF( "Not fusing %s because the bias of the preceding %s is consumed by " "another op", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } } else { if (!weights.buffer || !bias.buffer) { @@ -304,14 +307,14 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the preceding %s has non-constant weights or " "bias arrays", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } if (count_ops_consuming_weights > 1 || count_ops_consuming_bias > 1) { AddMessageF( "Not fusing %s because the weights or bias of the preceding %s is " "consumed by another op", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } } @@ -323,7 +326,7 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { "Not fusing %s because the output of the preceding %s is consumed by " "another op", LogName(*binary_op), LogName(*preceding_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Fusing %s into the preceding %s", LogName(*binary_op), @@ -352,7 +355,8 @@ bool FuseBinaryIntoPrecedingAffine::Run(Model* model, std::size_t op_index) { model->EraseArray(old_constant_param_name); } model->operators.erase(binary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc index 874d8def571fbce4219de15285c8df6fd2487a9a..4848867b9a0a73a27a25b19e406d81256716ae7d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/fuse_broadcast_into_following_binary.cc @@ -51,19 +51,22 @@ bool IsBroadcastingOp(const Model& model, Operator* op) { // Finds an operation that looks like a broadcast (concat of the same sources // along the last dimension) and drops it by relying on the ability of certain // binary ops to perform an implicit broadcast. -bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) { +::tensorflow::Status FuseBroadcastIntoFollowingBinary::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; auto* binary_op = binary_it->get(); // Test for binary ops of types that we know how to resolve if (binary_op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } // NOTE: either of these ops may be nullptr if the input array is constant. @@ -78,14 +81,14 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) { if (!is_op_0_broadcast && !is_op_1_broadcast) { // Neither input is a broadcast-looking thing. AddMessageF("Neither input looks broadcasty"); - return false; + return ::tensorflow::Status::OK(); } else if (is_op_0_broadcast && is_op_1_broadcast) { AddMessageF( "Unable to fuse broadcast into %s as both inputs (%s, %s) are " "broadcasts", LogName(*binary_op), op[0] ? LogName(*op[0]) : "(?)", op[1] ? LogName(*op[1]) : "(?)"); - return false; + return ::tensorflow::Status::OK(); } int broadcast_index = is_op_0_broadcast ? 0 : 1; @@ -96,7 +99,8 @@ bool FuseBroadcastIntoFollowingBinary::Run(Model* model, std::size_t op_index) { binary_op->inputs[broadcast_index] = op[broadcast_index]->inputs[0]; // We leave the broadcast op in; it'll get cleaned up if it's not used later. - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc index 6961e23690a5e53643f2b2c52bb62ce395d05c95..8b0bc2d865ea49140a112f75140005ee51eb2994 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.cc @@ -142,7 +142,7 @@ bool GraphTransformationsPass(int increment, Model* model, for (const auto& transformation : transformations) { CHECK(!changed_now); CHECK(transformation->Messages().empty()); - changed_now = transformation->Run(model, op_index); + CHECK(transformation->Run(model, op_index, &changed_now).ok()); const char* made_a_change_msg = changed_now ? "made a change" : "did NOT make a change"; const int log_level = diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 4d213b3f9cb930007096dbdd06b1981e9bab2c32..a89db320ea9d843485e112d28c57e650a8fe288b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -27,7 +27,8 @@ namespace toco { class GraphTransformation { public: - virtual bool Run(Model* model, std::size_t op_index) = 0; + virtual ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) = 0; virtual const char* Name() const = 0; virtual ~GraphTransformation() {} // Returns the list of messages that this graph transformation @@ -104,11 +105,12 @@ class GraphTransformationsSet { void RunGraphTransformations(Model* model, const string& message, const GraphTransformationsSet& transformations); -#define DECLARE_GRAPH_TRANSFORMATION(GTName) \ - class GTName : public GraphTransformation { \ - public: \ - bool Run(Model* model, std::size_t op_index) override; \ - const char* Name() const override { return #GTName; } \ +#define DECLARE_GRAPH_TRANSFORMATION(GTName) \ + class GTName : public GraphTransformation { \ + public: \ + ::tensorflow::Status Run(Model* model, std::size_t op_index, \ + bool* modified) override; \ + const char* Name() const override { return #GTName; } \ }; // List of all graph transformations @@ -200,7 +202,8 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveGatherAttributes) class PropagateDefaultMinMax : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "PropagateDefaultMinMax"; } bool has_any_ranges_defined() const { return !type_ranges_.empty(); } @@ -218,7 +221,8 @@ class PropagateDefaultMinMax : public GraphTransformation { class RemoveTrivialReshape : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "RemoveTrivialReshape"; } bool treat_expand_dims_as_trivial() const { return treat_expand_dims_as_trivial_; @@ -233,7 +237,8 @@ class RemoveTrivialReshape : public GraphTransformation { class ResolveConstantFakeQuant : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "ResolveConstantFakeQuant"; } // True if the num_bits should adjust the final data type. @@ -250,7 +255,8 @@ class ResolveConstantFakeQuant : public GraphTransformation { class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "EnsureUint8WeightsSafeForFastInt8Kernels"; } @@ -267,7 +273,8 @@ class EnsureUint8WeightsSafeForFastInt8Kernels : public GraphTransformation { class IdentifyDilatedConv : public GraphTransformation { public: - bool Run(Model* model, std::size_t op_index) override; + ::tensorflow::Status Run(Model* model, std::size_t op_index, + bool* modified) override; const char* Name() const override { return "IdentifyDilatedConv"; } bool identify_depthwise_conv() const { return identify_depthwise_conv_; } void set_identify_depthwise_conv(bool val) { identify_depthwise_conv_ = val; } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc index 3114fa93e8e2741e2d288d165085d677a8d2a96d..72df53548ba7b05424a7f2f4d7866e2d774cad3f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -372,7 +372,9 @@ bool HardcodeMinMaxForLstmCell(Model* model, Operator* op) { } } // namespace -bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { +::tensorflow::Status HardcodeMinMax::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); bool changed = false; @@ -467,7 +469,8 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { if (changed) { AddMessageF("Hardcoded min-max through %s", LogName(*op)); } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc index aac77eb39e4b0650b699d3160f5bbe54aff8cdde..9e4a3005a1d5347911c3967fe373bf009ecff3e8 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_dilated_conv.cc @@ -168,7 +168,10 @@ bool ResolveDilatedConv(Model* model, Operator* conv_base_op, Operator* stb_op, return true; } -bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyDilatedConv::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* stb_op = it->get(); @@ -176,17 +179,17 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { // *************************************************************************** // SpaceToBatch Op. if (stb_op->type != OperatorType::kSpaceToBatchND) { - return false; + return ::tensorflow::Status::OK(); } if (stb_op->inputs.size() != 3) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(stb_op->outputs.size(), 1); // Extract the dilation factor from Input[1] of SpaceToBatch // TODO(mjmatthews): Support 2D dilation factors. const auto& block_shape_array = model->GetArray(stb_op->inputs[1]); if (!block_shape_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(block_shape_array.shape().dimensions_count(), 1); int dilation_factor = @@ -195,7 +198,7 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { // Expand Op auto* post_stb_op = GetOpWithInput(*model, stb_op->outputs[0]); if (!post_stb_op) { - return false; + return ::tensorflow::Status::OK(); } bool has_expand_op = false; if (post_stb_op->type == OperatorType::kExpandDims) { @@ -229,7 +232,8 @@ bool IdentifyDilatedConv::Run(Model* model, std::size_t op_index) { } } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc index b78efd7fc3602dc2d6e03fd28d694c344b61c17c..78f60f52fbdbc74e5ec874782a16c0973e3f7b41 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_normalization.cc @@ -39,7 +39,10 @@ std::vector>::iterator FindOperator( } } // namespace -bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyL2Normalization::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto div_it = model->operators.begin() + op_index; const auto* div_or_mul_op = div_it->get(); OperatorType expected_op_type_producing_div_or_mul_input; @@ -48,7 +51,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { } else if (div_or_mul_op->type == OperatorType::kMul) { expected_op_type_producing_div_or_mul_input = OperatorType::kRsqrt; } else { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(div_or_mul_op->inputs.size(), 2); Operator* op_producing_div_or_mul_input[2] = { @@ -58,14 +61,14 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { if (!op_producing_div_or_mul_input[1] || op_producing_div_or_mul_input[1]->type != expected_op_type_producing_div_or_mul_input) { - return false; + return ::tensorflow::Status::OK(); } Operator* sqrt_or_rsqrt_op = op_producing_div_or_mul_input[1]; CHECK_EQ(sqrt_or_rsqrt_op->inputs.size(), 1); Operator* op_producing_sqrt_or_rsqrt_input = GetOpWithOutput(*model, sqrt_or_rsqrt_op->inputs[0]); if (!op_producing_sqrt_or_rsqrt_input) { - return false; + return ::tensorflow::Status::OK(); } // There may be an Add or a Maximum here, adding or clamping to a "small" @@ -105,7 +108,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { " because the operator producing the input to the square root, %s," ", does not match the expected pattern", LogName(*op_producing_sqrt_or_rsqrt_input)); - return false; + return ::tensorflow::Status::OK(); } } @@ -116,7 +119,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Normalization subgraph: " "expected Sum op, got %s", LogName(*sum_op)); - return false; + return ::tensorflow::Status::OK(); } Operator* square_op = GetOpWithOutput(*model, sum_op->inputs[0]); @@ -125,7 +128,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Normalization subgraph: " "expected Square op, got %s", LogName(*square_op)); - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(square_op->inputs.size(), 1); @@ -135,7 +138,7 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Normalization subgraph: %s does not " "take the same input as the Mul/Div node", LogName(*square_op)); - return false; + return ::tensorflow::Status::OK(); } // Create and emplace the new L2Normalization @@ -162,7 +165,8 @@ bool IdentifyL2Normalization::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, sqrt_or_rsqrt_op)); model->EraseArray(div_or_mul_op->inputs[1]); model->operators.erase(FindOperator(model, div_or_mul_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc index 705e73779b7f74698149d5e9e56f69a371326ceb..13664bb344def96a2780d230143c291a7600ecb7 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_l2_pool.cc @@ -38,11 +38,13 @@ std::vector>::iterator FindOperator( } } // namespace -bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyL2Pool::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto sqrt_it = model->operators.begin() + op_index; const auto* sqrt_op = sqrt_it->get(); if (sqrt_op->type != OperatorType::kSqrt) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(sqrt_op->inputs.size(), 1); @@ -56,7 +58,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { AddMessageF( "Giving up trying to identify L2Pool subgraph: " "expected AveragePool op, but Sqrt op has no preceding op"); - return false; + return ::tensorflow::Status::OK(); } if (prev_to_sqrt_op->type != OperatorType::kAveragePool) { @@ -64,7 +66,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Pool subgraph: " "expected AveragePool op, got %s", LogName(*prev_to_sqrt_op)); - return false; + return ::tensorflow::Status::OK(); } avpool_op = static_cast(prev_to_sqrt_op); @@ -77,7 +79,7 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { "Giving up trying to identify L2Pool subgraph: " "expected Square op, got %s", LogName(*square_op)); - return false; + return ::tensorflow::Status::OK(); } // Create and emplace L2Pool node. @@ -107,7 +109,8 @@ bool IdentifyL2Pool::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, avpool_op)); model->operators.erase(FindOperator(model, sqrt_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc index c0b014b45eb1df25173ce3ca3fa488b0655c3c76..7fd8f906e2c270dd356dd838c07f3e560ec7143a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm.cc @@ -132,7 +132,9 @@ bool MatchOperatorInputs(const Operator& op, const Model& model, } // namespace -bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyLstmCell::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; // This LSTM cell identification method is not invariant to commutation of // commutative operator inputs. For example, if input[0] and input[1] of the // final output multiplication were swapped, this method would not identify it @@ -143,13 +145,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { auto op_it = model->operators.begin() + op_index; Operator* final_output_mul = op_it->get(); if (final_output_mul->type != OperatorType::kMul) { - return false; + return ::tensorflow::Status::OK(); } Operator *state_output_tanh, *fc_output_sig; if (!MatchOperatorInputs(*final_output_mul, *model, OperatorType::kTanh, &state_output_tanh, OperatorType::kLogistic, &fc_output_sig)) { - return false; + return ::tensorflow::Status::OK(); } // State output TanH @@ -158,7 +160,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { Operator* state_combine_add; if (!MatchOperatorInputs(*state_output_tanh, *model, OperatorType::kAdd, &state_combine_add)) { - return false; + return ::tensorflow::Status::OK(); } // State forget & remember addition @@ -166,7 +168,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { if (!MatchOperatorInputs(*state_combine_add, *model, OperatorType::kMul, &state_forget_mul, OperatorType::kMul, &state_remember_mul)) { - return false; + return ::tensorflow::Status::OK(); } const string prev_state = state_forget_mul->inputs[0]; @@ -175,7 +177,7 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { if (!MatchOperatorInputs(*state_forget_mul, *model, OperatorType::kNone, nullptr, OperatorType::kLogistic, &state_forget_sig)) { - return false; + return ::tensorflow::Status::OK(); } // State remember gate @@ -183,40 +185,40 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { if (!MatchOperatorInputs(*state_remember_mul, *model, OperatorType::kLogistic, &state_remember_sig, OperatorType::kTanh, &state_info_tanh)) { - return false; + return ::tensorflow::Status::OK(); } // State remember "information" activation function Operator* fc_output_split; if (!MatchOperatorInputs(*state_info_tanh, *model, OperatorType::kSplit, &fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // State remember gate activation function Operator* tmp; if (!MatchOperatorInputs(*state_remember_sig, *model, OperatorType::kSplit, &tmp) || (tmp != fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // State forget gate activation function if (!MatchOperatorInputs(*state_forget_sig, *model, OperatorType::kSplit, &tmp) || (tmp != fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // Fully connected output activation function if (!MatchOperatorInputs(*fc_output_sig, *model, OperatorType::kSplit, &tmp) || (tmp != fc_output_split)) { - return false; + return ::tensorflow::Status::OK(); } // Fully connected output split Operator* fully_connected; if (!MatchOperatorInputs(*fc_output_split, *model, OperatorType::kNone, nullptr, OperatorType::kFullyConnected, &fully_connected)) { - return false; + return ::tensorflow::Status::OK(); } // Fully connected op @@ -225,13 +227,13 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { OperatorType::kConcatenation, &concat_inputs, OperatorType::kNone, nullptr, OperatorType::kNone, nullptr)) { - return false; + return ::tensorflow::Status::OK(); } if (static_cast(fully_connected)->weights_format != FullyConnectedWeightsFormat::kDefault) { // Not yet implemented: experimental shuffled weights in fused LSTM cell. - return false; + return ::tensorflow::Status::OK(); } // Emplace a new LSTM cell operator @@ -300,7 +302,8 @@ bool IdentifyLstmCell::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, *fully_connected)); DeleteArrayIfUnused(concat_inputs->outputs[0], model); model->operators.erase(FindOperator(model, *concat_inputs)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc index 5b6a984ee143a6007471b165510030cd3ad3f73c..6ccce923f361d76d208aa76c295c4ed5aea047d5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_merge_inputs.cc @@ -25,19 +25,22 @@ limitations under the License. namespace toco { -bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { +::tensorflow::Status MergeLstmCellInputs::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // Find lstm cell. auto op_it = model->operators.begin() + op_index; auto src_op = op_it->get(); if (src_op->type != OperatorType::kLstmCell) { - return false; + return ::tensorflow::Status::OK(); } // Already a compact LstmCell. Do not need to merge cell inputs. const auto* src_lstm_op = static_cast(src_op); if (src_lstm_op->kernel_type != LstmCellOperator::KERNEL_FULL || src_lstm_op->inputs.size() != kExtendedLstmInputCount) { - return false; + return ::tensorflow::Status::OK(); } // Identify prev_activ_input, prev_state_input as required Op inputs, @@ -45,12 +48,12 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { string prev_activ_input; if (!GetMatchingRnnArray(model, src_op->outputs[kOutputTensor], &prev_activ_input)) { - return false; + return ::tensorflow::Status::OK(); } string prev_state_input; if (!GetMatchingRnnArray(model, src_op->outputs[kCellStateTensor], &prev_state_input)) { - return false; + return ::tensorflow::Status::OK(); } // Get LstmCell's cell, input, output size. @@ -184,7 +187,8 @@ bool MergeLstmCellInputs::Run(Model* model, std::size_t op_index) { DeleteArrayIfUnused(src_op->inputs[kOutputGateBiasTensor], model); model->operators.erase(FindOp(*model, src_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc index 46d1fce50e5d6e2a74cf5461d731e46469dde5bf..ad5120e2aa5f4ea31607f0e8fdf840664fc66a99 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_lstm_split_inputs.cc @@ -25,19 +25,22 @@ limitations under the License. namespace toco { -bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { +::tensorflow::Status SplitLstmCellInputs::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // Find lstm cell. auto op_it = model->operators.begin() + op_index; auto curr_op = op_it->get(); if (curr_op->type != OperatorType::kLstmCell) { - return false; + return ::tensorflow::Status::OK(); } const auto* curr_lstm_op = static_cast(curr_op); // Already an extended LstmCell. Do not need to split cell inputs. if (curr_lstm_op->kernel_type != LstmCellOperator::KERNEL_BASIC || curr_lstm_op->inputs.size() != LstmCellOperator::NUM_INPUTS) { - return false; + return ::tensorflow::Status::OK(); } // Make sure the WEIGHTS_INPUT and BIASES_INPUT are constant arrays, @@ -46,13 +49,13 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { *model, curr_op->inputs[LstmCellOperator::WEIGHTS_INPUT]) || !IsConstantParameterArray( *model, curr_op->inputs[LstmCellOperator::BIASES_INPUT])) { - return false; + return ::tensorflow::Status::OK(); } // Make sure propagate_fixed_sizes has defined the size of the output. if (!model->GetArray(curr_op->outputs[LstmCellOperator::ACTIV_OUTPUT]) .has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // Emplace a new LstmCell operator with extended inputs (kernel/lstm.cc). @@ -168,7 +171,8 @@ bool SplitLstmCellInputs::Run(Model* model, std::size_t op_index) { DeleteArrayIfUnused(curr_op->inputs[LstmCellOperator::BIASES_INPUT], model); model->operators.erase(FindOp(*model, curr_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc index b90a156a0dcfcd77c3e2b47bb0d77e246f2fc625..c11fee4dc94041b3608c18378df67a08443b513a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_prelu.cc @@ -43,13 +43,15 @@ limitations under the License. namespace toco { -bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyPRelu::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto add_op_it = model->operators.begin() + op_index; const auto* add_op = add_op_it->get(); if (add_op == nullptr || add_op->type != OperatorType::kAdd || add_op->inputs.size() != 2 || add_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } const auto* relu_input_op = GetOpWithOutput(*model, add_op->inputs[0]); @@ -57,7 +59,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { relu_input_op->inputs.size() != 1 || relu_input_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } // TODO(ycling): Both Add and Mul are commutative. Support the case where @@ -66,7 +68,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { if (mul_op == nullptr || mul_op->type != OperatorType::kMul || mul_op->inputs.size() != 2 || mul_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } const auto neg_alpha_tensor_name = mul_op->inputs[0]; @@ -75,7 +77,7 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { if (relu_neg_input_op == nullptr || relu_neg_input_op->inputs.size() != 1) { - return false; + return ::tensorflow::Status::OK(); } const Operator* final_input_op; @@ -92,13 +94,13 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { relu_neg_input_op->type != OperatorType::kRelu || relu_neg_input_op->fused_activation_function != FusedActivationFunctionType::kNone) { - return false; + return ::tensorflow::Status::OK(); } final_input_op = neg_input_op; } if (relu_input_op->inputs[0] != final_input_op->inputs[0]) { - return false; + return ::tensorflow::Status::OK(); } const auto input_tensor_name = relu_input_op->inputs[0]; @@ -128,7 +130,8 @@ bool IdentifyPRelu::Run(Model* model, std::size_t op_index) { // intermediate tensors aren't used by other ops, those will be removed by // other graph transformation rules. model->operators.erase(FindOp(*model, add_op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc index 94820a016622a12654e91967737e05fc91ed404c..51d0629362edbe1183f20e32b16437f07e1281c7 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/identify_relu1.cc @@ -56,13 +56,15 @@ int GetSingleScalarInputIndexOfBinaryOp(Model* model, const Operator* op, } } // namespace -bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { +::tensorflow::Status IdentifyRelu1::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; // Follow sequences of min+max and max+min. First get the leading op. const auto op_it = model->operators.begin() + op_index; const auto* op_0 = op_it->get(); if (op_0->type != OperatorType::kMinimum && op_0->type != OperatorType::kMaximum) { - return false; + return ::tensorflow::Status::OK(); } // Get the paired op and ensure it's the counter to the first. @@ -71,17 +73,17 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { (op_1->type != OperatorType::kMinimum && op_1->type != OperatorType::kMaximum) || op_0->type == op_1->type) { - return false; + return ::tensorflow::Status::OK(); } const auto* min_op = op_0->type == OperatorType::kMinimum ? op_0 : op_1; const auto* max_op = op_0->type == OperatorType::kMaximum ? op_0 : op_1; if (min_op->inputs.size() != 2 || max_op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } if (min_op->outputs.size() != 1 || max_op->outputs.size() != 1) { - return false; + return ::tensorflow::Status::OK(); } // Get the original input to the min+max pair. @@ -90,7 +92,7 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { int max_scalar_input_index = GetSingleScalarInputIndexOfBinaryOp(model, max_op, -1.0f); if (min_scalar_input_index == -1 || max_scalar_input_index == -1) { - return false; + return ::tensorflow::Status::OK(); } int op_0_scalar_input_index = op_0 == min_op ? min_scalar_input_index : max_scalar_input_index; @@ -111,7 +113,8 @@ bool IdentifyRelu1::Run(Model* model, std::size_t op_index) { model->operators.erase(FindOperator(model, op_0)); model->operators.erase(FindOperator(model, op_1)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc index f684de08abf72d05d4408bf6341fa5a3c2ed11cd..5bf17d5b4cd1a02a2848bf1da2be2e9cfb8b314c 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc @@ -97,7 +97,10 @@ bool AddDequantizeOperatorToInput(const string& input_name, const Operator* op, return true; } -bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status MakeInitialDequantizeOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // This is effectively a transformation applied to edges. We iterate over the // specified node (op) and proceed for input edges. const auto it = model->operators.begin() + op_index; @@ -114,7 +117,8 @@ bool MakeInitialDequantizeOperator::Run(Model* model, std::size_t op_index) { } } } - return change_made; + *modified = change_made; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc index 95bc7f7d4b8b517c1cc5a73b3e85bbd985ce460f..06de9b1cd89571b693b84189883350f5b2c176c5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/merge_reshape_into_preceding_transpose.cc @@ -102,18 +102,19 @@ std::vector ReshapeToTranspose(const Model& model, // to be merged if the reshape does not affect memory ordering and does not // affects the number of dimensions. This only occurs when only unary dimensions // are shifting position. -bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status MergeReshapeIntoPrecedingTranspose::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* reshape_op = ConvertOperator( it->get(), OperatorType::kReshape); if (reshape_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } const string intermediate_name = reshape_op->inputs[0]; @@ -121,13 +122,13 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, // Guarantee the input is only consume by the reshape. if (CountOpsWithInput(*model, intermediate_name) != 1) { - return false; + return ::tensorflow::Status::OK(); } // Check for the parent operator. const auto& transpose_it = FindOpWithOutput(*model, intermediate_name); if (transpose_it == model->operators.end()) { - return false; + return ::tensorflow::Status::OK(); } // Find the parent operator and guarantee it is a transpose. @@ -135,16 +136,16 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, transpose_it->get(), OperatorType::kTranspose); if (transpose_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) { - return false; + return ::tensorflow::Status::OK(); } if (!ReshapeIsEquivalentToTranspose(*model, reshape_op, false /*allow_extra_unary_dimensions*/)) { - return false; + return ::tensorflow::Status::OK(); } // Check that the intermediate is not an output array. @@ -153,7 +154,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, "Cannot fuse %s and %s as it would invalidate the transpose " "output array.", LogName(*transpose_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Merging operations %s and %s", LogName(*transpose_op), @@ -172,7 +173,7 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, // Remove the reshape as passthrough operation. if (!RemoveTrivialPassthroughOp(this, model, op_index)) { - return false; + return ::tensorflow::Status::OK(); } // Update transpose_op's constant buffer to contain the new permutation. @@ -184,7 +185,8 @@ bool MergeReshapeIntoPrecedingTranspose::Run(Model* model, // transpose_ops's shape will likely has changed. model->GetArray(transpose_op->outputs[0]).clear_shape(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc index 7f44c65285bdef6ba314b16122fdd550bfa47e6a..f0d8d924adbd34cb3778c9d1a3debefaa4ca07b7 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/move_binary_operator_before_reshape.cc @@ -54,7 +54,10 @@ bool IsTailOfShape(const Shape& tail, const Shape& shape) { // // Note we are testing for one particular case of a broader set of possible // binary-reshape op transformations. This transformation could be generalized. -bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status MoveBinaryOperatorBeforeReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; Operator* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && @@ -69,7 +72,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { binary_op->type != OperatorType::kLessEqual && binary_op->type != OperatorType::kGreater && binary_op->type != OperatorType::kGreaterEqual) { - return false; + return ::tensorflow::Status::OK(); } // BINARY OP INPUT CHECKS @@ -81,11 +84,11 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { if (!input_is_const[0] && !input_is_const[1]) { // To limit our scope, we require one constant input. Though there's no // reason this transformation wouldn't work with all variable inputs. - return false; + return ::tensorflow::Status::OK(); } if (input_is_const[0] && input_is_const[1]) { // Both inputs are constants. Leave this for constants propagation. - return false; + return ::tensorflow::Status::OK(); } const int constant_input_idx = input_is_const[0] ? 0 : 1; const int variable_input_idx = input_is_const[0] ? 1 : 0; @@ -98,13 +101,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { AddMessageF( "Not moving %s because it's non-constant input shape is not resolved.", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } if (!IsTailOfShape( model->GetArray(binary_op->inputs[constant_input_idx]).shape(), model->GetArray(binary_op->inputs[variable_input_idx]).shape())) { // Constant array shape must be the latter part of the variable shape. - return false; + return ::tensorflow::Status::OK(); } // RESHAPE OP CHECKS @@ -113,13 +116,13 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { if (reshape_it == model->operators.end()) { AddMessageF("Not moving %s because it's variable input is not connected.", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } Operator* reshape_op = reshape_it->get(); if (reshape_op->type != OperatorType::kReshape) { AddMessageF("Not moving %s because the preceding %s is not a reshape op", LogName(*binary_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } const auto& reshape_input_array = model->GetArray(reshape_op->inputs[0]); if (!reshape_input_array.has_shape()) { @@ -127,14 +130,14 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { "Not moving %s because it's non-constant input shape is not resolved " "yet", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } if (!IsTailOfShape( model->GetArray(binary_op->inputs[constant_input_idx]).shape(), model->GetArray(reshape_op->outputs[0]).shape())) { // Constant array shape must be the latter part of the binary op output // shape. - return false; + return ::tensorflow::Status::OK(); } // EXTRA CHECKS ON CONNECTING ARRAY @@ -143,7 +146,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { AddMessageF( "Not moving %s because the output of reshape op %s is an output op.", LogName(*binary_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } } int count_ops_consuming_output = @@ -154,7 +157,7 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { "Not moving %s because the output of reshape op %s is consumed by " "another op", LogName(*binary_op), LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } // SWAP ORDER OF BINARY AND RESHAPE OPS @@ -172,7 +175,8 @@ bool MoveBinaryOperatorBeforeReshape::Run(Model* model, std::size_t op_index) { // Clear binary output shape so it will be re-propagated model->GetArray(binary_op->outputs[0]).clear_shape(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc index cf17c49b1098d02468935aa72d1d1e73b4addbe1..9c1ed2b732dcce32deb780d196999c23347fb8e8 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc @@ -26,20 +26,21 @@ limitations under the License. namespace toco { -bool PropagateActivationFunctionIntoConstants::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status PropagateActivationFunctionIntoConstants::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto ac_it = model->operators.begin() + op_index; const auto* ac_op = ac_it->get(); if (ac_op->type != OperatorType::kRelu6 && ac_op->type != OperatorType::kRelu1 && ac_op->type != OperatorType::kRelu) { - return false; + return ::tensorflow::Status::OK(); } // Find the op producing the array passed to this activation function. auto* src_op = GetOpWithOutput(*model, ac_op->inputs[0]); if (!src_op) { - return false; + return ::tensorflow::Status::OK(); } // Ensure the src_op is not used without the activation function applied. @@ -57,7 +58,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, src_op_input = src_op->inputs[0]; break; default: - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(src_op->outputs[0], ac_op->inputs[0]); @@ -69,7 +70,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, "Not propagating activation function %s into %s:%s because it is not " "constant", LogName(*ac_op), LogName(*src_op), src_op_input); - return false; + return ::tensorflow::Status::OK(); } // Get the array we'll be working with and ensure it's a compatible type. @@ -79,7 +80,7 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, "Not propagating activation function %s into %s:%s because it is " "non-float data", LogName(*ac_op), LogName(*src_op), src_op_input); - return false; + return ::tensorflow::Status::OK(); } auto& const_array_data = const_array.GetMutableBuffer().data; @@ -108,14 +109,15 @@ bool PropagateActivationFunctionIntoConstants::Run(Model* model, } default: LOG(FATAL) << "Unsupported activation function " << LogName(*ac_op); - return false; + return ::tensorflow::Status::OK(); } const_array_data[i] = new_value; } AddMessageF("Propagated activation function %s into %s:%s", LogName(*ac_op), LogName(*src_op), src_op_input); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc index 323eefcd3a7665a8c01da1bc10d6f8d80da7a15d..47faa20a291a0d5cecb79a70b311882683406af6 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -32,7 +32,10 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op, } } // namespace -bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateArrayDataTypes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); @@ -40,7 +43,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { for (const auto& input : op->inputs) { if (!model->IsOptionalArray(input) && model->GetArray(input).data_type == ArrayDataType::kNone) { - return false; + return ::tensorflow::Status::OK(); } } // Record data types of output before processing, so we can see at the @@ -131,7 +134,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { auto* rand_op = static_cast(op); // The output type of RandomUniform is specified with an attribute if (rand_op->dtype == ArrayDataType::kNone) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->outputs.size(), 1); SetDataTypeForAllOutputs(model, op, rand_op->dtype); @@ -153,7 +156,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { // This can make unsupported_op->output_data_types have more elements than // op->outputs. if (unsupported_op->output_data_types.size() < op->outputs.size()) { - return false; + return ::tensorflow::Status::OK(); } for (int i = 0; i < op->outputs.size(); ++i) { const string& output = op->outputs[i]; @@ -164,7 +167,7 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { } case OperatorType::kExpandDims: { // Yield on ExpandDim until it is converted to Reshape - return false; + return ::tensorflow::Status::OK(); } case OperatorType::kSelect: { // Select produces outputs with the same type as their 2nd input @@ -236,6 +239,12 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { } break; } + case OperatorType::kUnidirectionalSequenceLstm: { + const ArrayDataType data_type = model->GetArray(op->inputs[0]).data_type; + if (data_type != ArrayDataType::kFloat) return ::tensorflow::Status::OK(); + SetDataTypeForAllOutputs(model, op, data_type); + break; + } default: { // These operators produce outputs with the same type as their 1st input CHECK_GT(op->inputs.size(), 0); @@ -248,10 +257,11 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { // Return true if any output data type changed, false if none changed. for (const auto& output : op->outputs) { if (old_output_data_types[output] != model->GetArray(output).data_type) { - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc index cd078ef189e922682098a0ec8dc4743060181aac..3cf191436dc8d572cb9b4adb5e1ef08d4e9a363a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_default_min_max.cc @@ -39,7 +39,10 @@ bool SupportsMinMax(const Array& array) { // When provided a set of min/max values for uint8 arrays this will rescale // the values for other data types as required and preserving the floating point // range within the new type. -bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateDefaultMinMax::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* op = it->get(); @@ -61,7 +64,8 @@ bool PropagateDefaultMinMax::Run(Model* model, std::size_t op_index) { } } - return did_change; + *modified = did_change; + return ::tensorflow::Status::OK(); } // Sets the min/max on the given array, adjusting the reference_minmax for the diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc index 3ad6b0ec6f7a3c4a9a0ab3964c1198ee757ea4b5..d0113237ce6e43140704672c4cfa5866b7cf49a4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fake_quant_num_bits.cc @@ -277,11 +277,14 @@ bool RecursivelyForwardPropagateDataType(GraphTransformation* transformation, // nice logging and integration with the graphviz video dumping mode. // In general you should not copy this style of transformation and stick to // local-only changes as seen in the other transformations. -bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateFakeQuantNumBits::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); if (op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(op); @@ -290,7 +293,7 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) { &quantized_data_type)) { AddMessageF("FakeQuant op %s num_bits=%d is out of range, ignoring", LogName(*op), fakequant_op->num_bits); - return false; + return ::tensorflow::Status::OK(); } const auto& final_minmax = *fakequant_op->minmax; @@ -311,7 +314,8 @@ bool PropagateFakeQuantNumBits::Run(Model* model, std::size_t op_index) { did_change |= RecursivelyForwardPropagateDataType(this, model, op, quantized_data_type); - return did_change; + *modified = did_change; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index d056a8add7a1875f8274c72b0f37b0fc5239223c..e861df2b3d0460aeedb3f3e19a589ab17b7687b4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -946,6 +946,49 @@ void ProcessLstmCellOperator(Model* model, LstmCellOperator* op) { .copy_shape(activ_temp_shape); } +void ProcessUnidirectionalSequenceLstmOperator( + Model* model, UnidirectionalSequenceLstmOperator* op) { + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.has_shape()) { + // Shape already propagated + return; + } + + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes + return; + } + + // TODO(renjieliu): check the inputs, as well as all kinds of weights. + const auto& input_array = model->GetArray(op->inputs[0]); + // Yield until input dims have been resolved. + if (!input_array.has_shape()) { + return; + } + const auto& input_shape = input_array.shape(); + const int batch_size = input_shape.dims(1); + const int timestamp = input_shape.dims(0); + + const auto& recurrent_to_output_weights_array = + model->GetArray(op->inputs[8]); + // Yield until input dims have been resolved. + if (!recurrent_to_output_weights_array.has_shape()) { + return; + } + + constexpr int kInputActivationStateTensor = 18; + constexpr int kInputCellStateTensor = 19; + // b(115961645): This is a hack to work around. + model->GetArray(op->inputs[kInputActivationStateTensor]).buffer.reset(); + model->GetArray(op->inputs[kInputCellStateTensor]).buffer.reset(); + + const auto& output_weights_shape = recurrent_to_output_weights_array.shape(); + const int output_size = output_weights_shape.dims(1); + + Shape* output_shape = output_array.mutable_shape(); + output_shape->ReplaceDims({timestamp, batch_size, output_size}); +} + void ProcessSpaceToBatchNDOperator(Model* model, SpaceToBatchNDOperator* op) { const auto& input_array = model->GetArray(op->inputs[0]); // Yield until input dims have been resolved. @@ -1622,7 +1665,10 @@ void ProcessUnpackOperator(Model* model, UnpackOperator* op) { } // namespace -bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status PropagateFixedSizes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); std::unordered_map> old_output_dims; @@ -1797,6 +1843,10 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { ProcessResizeBilinearOperator(model, static_cast(op)); break; + case OperatorType::kUnidirectionalSequenceLstm: + ProcessUnidirectionalSequenceLstmOperator( + model, static_cast(op)); + break; case OperatorType::kLstmCell: ProcessLstmCellOperator(model, static_cast(op)); break; @@ -1836,7 +1886,7 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { static_cast(op); // Attribute can be not specified, ignore it. if (unsupported_op->output_shapes.size() < op->outputs.size()) { - return false; + return ::tensorflow::Status::OK(); } for (int i = 0; i < op->outputs.size(); ++i) { const string& output = op->outputs[i]; @@ -1886,10 +1936,11 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { (old_output_dims[output] != model->GetArray(output).shape().dims())) { AddMessageF("Set shape of %s to [%s]", output, absl::StrJoin(model->GetArray(output).shape().dims(), ",")); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index fb299c31b793619c4fb5203211c79f4b32a82af3..29ea17dc612820bff3d648bbbc8b94bfc0dd9512 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -439,7 +439,9 @@ void FixMinMaxPostQuantization(GraphTransformation* transformation, } // namespace -bool Quantize::Run(Model* model, std::size_t op_index) { +::tensorflow::Status Quantize::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; // Our general "quantization" graph transformation consists in replacing // QuantizedInputArrays[] -> // DequantizeOperators[] -> @@ -460,7 +462,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) { auto& op = *model->operators[op_index]; if (op.type == OperatorType::kDequantize || op.type == OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } // Our assumption here is that the input arrays are already quantized - @@ -497,7 +499,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) { if (!array.minmax && !array.buffer) { LOG(ERROR) << "Can't quantize input array " << input << " because it lacks min/max info"; - return false; + return ::tensorflow::Status::OK(); } const auto* other_op = GetOpWithOutput(*model, input); if (other_op && other_op->type != OperatorType::kDequantize) { @@ -507,7 +509,7 @@ bool Quantize::Run(Model* model, std::size_t op_index) { "which means that we should yield and let other ops " "get quantized first", LogName(op), input); - return false; + return ::tensorflow::Status::OK(); } } } @@ -672,7 +674,8 @@ bool Quantize::Run(Model* model, std::size_t op_index) { } } - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc index eaa9d3bcda5e25da2e1a31bf37be804cbe15bdd0..0c32218ff2e972d0f0ab969930cc7ac84ef0dcc6 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/read_array_minmax_and_narrow_range_from_fake_quant.cc @@ -51,18 +51,19 @@ bool ApplyAttrsToArray(GraphTransformation* transformation, Model* model, } // end namespace -bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fq_op = static_cast(fakequant_base_op); if (!fq_op->minmax) { // Need to be resolved first by ResolveFakeQuantArgsFromVars. - return false; + return ::tensorflow::Status::OK(); } // At this point, this FakeQuantOperator should have a MinMax @@ -74,7 +75,8 @@ bool ReadArrayMinmaxAndNarrowRangeFromFakeQuant::Run(Model* model, bool changed = false; changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->inputs[0]); changed |= ApplyAttrsToArray(this, model, *fq_op, fq_op->outputs[0]); - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc index c3b2709a33d54213661ba96394b01aa2cfd1a278..fe8023ab8fe1d88813f3c78436f7cba650a27c30 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_final_dequantize_op.cc @@ -25,11 +25,14 @@ limitations under the License. namespace toco { -bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveFinalDequantizeOp::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto dequantize_it = model->operators.begin() + op_index; const auto* dequantize_op = dequantize_it->get(); if (dequantize_op->type != OperatorType::kDequantize) { - return false; + return ::tensorflow::Status::OK(); } const auto& output = dequantize_op->outputs[0]; // We can remove any dequantize op whose output is not consumed by @@ -38,7 +41,7 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) { // in the middle of the graph might be designated as an output // array. if (CountOpsWithInput(*model, output)) { - return false; + return ::tensorflow::Status::OK(); } // If one of the model's output arrays was actually the Dequantize op's @@ -53,7 +56,8 @@ bool RemoveFinalDequantizeOp::Run(Model* model, std::size_t op_index) { AddMessageF("Removed final %s", LogName(*dequantize_op)); model->EraseArray(output); model->operators.erase(dequantize_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc index 73ad326299bbd929afbb8dda2c41b97a126afbe1..be8c0acc7b5cc6f06083c0da6f694e40fbd64b6f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_assert.cc @@ -23,11 +23,14 @@ limitations under the License. namespace toco { -bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTensorFlowAssert::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto assert_it = model->operators.begin() + op_index; const auto* assert_op = assert_it->get(); if (assert_op->type != OperatorType::kAssert) { - return false; + return ::tensorflow::Status::OK(); } bool changed = false; @@ -54,7 +57,8 @@ bool RemoveTensorFlowAssert::Run(Model* model, std::size_t op_index) { // That's it. We can stop here, no need to duplicate the work that // RemoveUnusedOp will do removing this now-unused node. - return changed; + *modified = changed; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc index 7ec7752f25dad1c24b821733c0e6dafbd1cd8bf2..37fe5fa3d7190c3c9a6795425c4840e3284aa252 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_tensorflow_identity.cc @@ -25,14 +25,18 @@ limitations under the License. namespace toco { -bool RemoveTensorFlowIdentity::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTensorFlowIdentity::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto passthru_it = model->operators.begin() + op_index; const auto* passthru_op = passthru_it->get(); if (passthru_op->type != OperatorType::kIdentity) { - return false; + return ::tensorflow::Status::OK(); } - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc index 0dfdc40e4c3410330135736690af4a85b42a0041..68c6fb65c5c6b8460819c5bc5c50ebf485524733 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_binary.cc @@ -46,14 +46,17 @@ bool AreAllBufferElementsEqualTo(const std::vector& buffer_data, // For example, an Add operator is trivial if // one of its operands is constant 0, a Mul operator is trivial // if one of its operands is constant 1, etc. -bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialBinaryOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; auto* binary_op = binary_it->get(); if (binary_op->type != OperatorType::kAdd && binary_op->type != OperatorType::kMul && binary_op->type != OperatorType::kSub && binary_op->type != OperatorType::kDiv) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -66,12 +69,12 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can resolve here. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants // propagation, not for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -84,7 +87,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { const auto& input_array_1 = model->GetArray(binary_op->inputs[1]); if (!input_array_0.has_shape() || !input_array_1.has_shape()) { // Both input shapes must be known. - return false; + return ::tensorflow::Status::OK(); } if (input_array_0.shape().dimensions_count() == input_array_1.shape().dimensions_count() && @@ -94,7 +97,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { "(lhs %s, rhs %s)", LogName(*binary_op), ShapeToString(input_array_0.shape()), ShapeToString(input_array_1.shape())); - return false; + return ::tensorflow::Status::OK(); } // Now check if the constant operand makes this binary @@ -103,7 +106,7 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { model->GetArray(binary_op->inputs[index_of_constant_input]); // For now, we only handle floats here. if (constant_input_array.data_type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } const auto& constant_input_float_data = constant_input_array.GetBuffer().data; @@ -121,12 +124,13 @@ bool RemoveTrivialBinaryOperator::Run(Model* model, std::size_t op_index) { } if (!is_trivial) { - return false; + return ::tensorflow::Status::OK(); } // Now we know that this node is trivial, so we can remove it. AddMessageF("Removing trivial %s", LogName(*binary_op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc index 3ceb93d8eedbb3743be112e6bd03cfe3e6f74d13..faaa2a828e306cbaf127cf1554a38238580ef435 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation.cc @@ -25,16 +25,20 @@ limitations under the License. namespace toco { -bool RemoveTrivialConcatenation::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialConcatenation::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto concat_it = model->operators.begin() + op_index; auto* concat_op = concat_it->get(); if (concat_op->type != OperatorType::kConcatenation) { - return false; + return ::tensorflow::Status::OK(); } if (concat_op->inputs.size() != 1) { - return false; + return ::tensorflow::Status::OK(); } - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc index 936854a04fd600ea23ab5dda50370f85a311c28c..ccfc181fe007455b69f44a29f5323deb39f1a0b9 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_concatenation_input.cc @@ -25,7 +25,10 @@ limitations under the License. namespace toco { -bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialConcatenationInput::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // TensorFlow allows Concatenation nodes to have 0-D inputs, // and they are then treated as empty i.e. omitted from concatenation, // in violation of the notion that 0-D is equivalent to 1x1x1x1. @@ -36,7 +39,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { const auto concat_it = model->operators.begin() + op_index; auto* concat_op = concat_it->get(); if (concat_op->type != OperatorType::kConcatenation) { - return false; + return ::tensorflow::Status::OK(); } std::vector trivial_inputs; std::vector nontrivial_inputs; @@ -52,7 +55,7 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { } if (trivial_inputs.empty()) { - return false; + return ::tensorflow::Status::OK(); } // Drop trivial inputs. @@ -63,7 +66,8 @@ bool RemoveTrivialConcatenationInput::Run(Model* model, std::size_t op_index) { } } concat_op->inputs = nontrivial_inputs; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc index 2c8d04440f251f792d2a09155dd26fc01a732109..5448a816bc43ac803cd2022c6659584683907280 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_fake_quant.cc @@ -64,23 +64,27 @@ bool IsFakeQuantTrivial(GraphTransformation* transformation, const Model& model, } // namespace // Removes FakeQuant ops that are trivial (have no effect, are redundant, etc). -bool RemoveTrivialFakeQuant::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialFakeQuant::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; auto* op = op_it->get(); if (op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(op); if (!IsFakeQuantTrivial(this, *model, *fakequant_op)) { AddMessageF("%s is not trivial", LogName(*fakequant_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Removing trivial %s", LogName(*fakequant_op)); CHECK_EQ(fakequant_op->inputs.size(), 1); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc index 752560e075a087bcc2b0a3cb19dad484fb582d42..4133815285fdc500e6a112eeedff1bbf6bca66ab 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc @@ -94,12 +94,13 @@ bool IsTrivialFusedActivationFunc( // Attempts to remove both fused and unfused activation functions if the // quantization params indicate that the representable values fall inside the // activation range. -bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, - std::size_t op_index) { +::tensorflow::Status RemoveTrivialQuantizedActivationFunc::Run( + Model* model, std::size_t op_index, bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* op = it->get(); if (op->inputs.empty()) { - return false; + return ::tensorflow::Status::OK(); } if (IsTrivialUnfusedActivationFunc(this, *model, op->type, op->inputs[0])) { @@ -107,7 +108,8 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, "Removing trivial unfused activation function %s because the input " "minmax imply at least as tight a clamp anyway.", LogName(*op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } if (IsTrivialFusedActivationFunc(this, *model, op->fused_activation_function, op->outputs[0])) { @@ -117,9 +119,10 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, "because the output quantization parameters imply at least as tight " "a clamp anyway.", LogName(*op)); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc index 142c876b154755ac9c6b93e560f22ec8d6ec6563..0f0ae4af6937283d60985292aea3cb5166aa9b1e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_min_max.cc @@ -69,22 +69,26 @@ bool IsTrivialMinMax(GraphTransformation* transformation, const Model& model, // Attempts to remove min/max functions if the quantization params indicate that // the representable values fall inside the clip range. -bool RemoveTrivialQuantizedMinMax::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialQuantizedMinMax::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* op = it->get(); if ((op->type != OperatorType::kMinimum && op->type != OperatorType::kMaximum) || op->inputs.size() != 2) { - return false; + return ::tensorflow::Status::OK(); } if (IsTrivialMinMax(this, *model, op->type, op->inputs[0], op->inputs[1])) { AddMessageF( "Removing trivial min/max %s because the quantization parameters imply " "at least as tight a clamp anyway.", LogName(*op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc index 5295eeccecb05b05232922f4b5e4ef75a2b04672..1caf94487979848b2c58ddcdf3ddfadf79c40567 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc @@ -81,22 +81,26 @@ bool IsReshapeTrivial(const Model& model, const Operator& op, } // namespace -bool RemoveTrivialReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto reshape_it = model->operators.begin() + op_index; auto* reshape_op = reshape_it->get(); if (reshape_op->type != OperatorType::kReshape) { - return false; + return ::tensorflow::Status::OK(); } if (!IsReshapeTrivial(*model, *reshape_op, this)) { AddMessageF("%s is not trivial", LogName(*reshape_op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Removing trivial %s", LogName(*reshape_op)); CHECK_EQ(reshape_op->inputs.size(), 2); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc index 0cbbcd7c814d38e32ee55e9d9271adf532d20924..dcb0148d583f1c1c99d49e0699e3c18d35508cc2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_slice.cc @@ -49,21 +49,24 @@ bool IsSliceTrivial(const Model& model, const Operator& op, } // namespace -bool RemoveTrivialSlice::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveTrivialSlice::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto reshape_it = model->operators.begin() + op_index; auto* slice_op = reshape_it->get(); if (slice_op->type != OperatorType::kSlice) { - return false; + return ::tensorflow::Status::OK(); } if (!IsSliceTrivial(*model, *slice_op, this)) { - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Removing trivial %s", LogName(*slice_op)); CHECK_EQ(slice_op->inputs.size(), 3); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc index dde91234a8240f4518cd105c2cc4e79102735980..3cd5d06baebc5a7a1807f156a52a24675343be7d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_unused_op.cc @@ -25,7 +25,9 @@ limitations under the License. namespace toco { -bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { +::tensorflow::Status RemoveUnusedOp::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* op = it->get(); @@ -58,7 +60,7 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { } for (const string& output_array : model->flags.output_arrays()) { if (output == output_array) { - return false; + return ::tensorflow::Status::OK(); } } for (const auto& rnn_state : model->flags.rnn_states()) { @@ -67,19 +69,19 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { if (!IsDiscardableArray(*model, rnn_state.back_edge_source_array()) || !IsDiscardableArray(*model, rnn_state.state_array()) || CountOpsWithInput(*model, rnn_state.state_array())) { - return false; + return ::tensorflow::Status::OK(); } } } if (CountOpsWithInput(*model, output)) { - return false; + return ::tensorflow::Status::OK(); } } if (op->unresolved_outputs) { AddMessageF("Not discarding %s because it has unresolved outputs.", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Discarding %s because none of its outputs is used.", @@ -105,7 +107,8 @@ bool RemoveUnusedOp::Run(Model* model, std::size_t op_index) { } } model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc index 550de83018f25a7aa4da82707fedb86434615fb0..3c8d41108918ad6b4232fcc0168c9e0cd65100e5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_elementwise_unary.cc @@ -63,29 +63,32 @@ bool IsMoveOperator(OperatorType optype) { // Swap elementwise operators such that all value operators occur before all // element move operators, e.g. negation then transpose. -bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ReorderElementwiseUnary::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto element_op_it = model->operators.begin() + op_index; std::unique_ptr& element_op = *element_op_it; if (!IsElementwiseOperator(element_op->type)) { - return false; + return ::tensorflow::Status::OK(); } const string intermediate_name = element_op->inputs[0]; auto it = FindOpWithOutput(*model, intermediate_name); if (it == model->operators.end()) { AddMessageF("No preceding operator"); - return false; + return ::tensorflow::Status::OK(); } std::unique_ptr& move_op = *it; if (!IsMoveOperator(move_op->type)) { AddMessageF("Preceding operator is not a move operator"); - return false; + return ::tensorflow::Status::OK(); } if (CountOpsWithInput(*model, intermediate_name) != 1) { AddMessageF("Input %s used elsewhere", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // Check that the intermediate is discardable. @@ -94,7 +97,7 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) { "Cannot swap elementwise as it would invalidate %s which is " "an output array.", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // op->inputs may change so we need to keep a value by copy. @@ -147,7 +150,8 @@ bool ReorderElementwiseUnary::Run(Model* model, std::size_t op_index) { // Swap the order of the operators. element_op.swap(move_op); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc index c907a597cb719b68dbf36868a75e49a7c5181423..a2c06e71e8ec93352b539e42d7020db625e55bf1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_reshape_transpose.cc @@ -101,37 +101,40 @@ std::vector ComputeNewPerm(std::vector input_dims, // Swaps reshape-transpose to transpose-reshape whenever possible. This is // possible when the reshape does not affect memory ordering. -bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ReorderReshapeTranspose::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto transpose_it = model->operators.begin() + op_index; TransposeOperator* transpose_op = ConvertOperator( transpose_it->get(), OperatorType::kTranspose); if (transpose_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } if (!OperatorReady(*model, transpose_op) || transpose_op->perm.empty()) { // Wait for values to propagate. - return false; + return ::tensorflow::Status::OK(); } // Find the operator that produces the transpose op. auto reshape_it = FindOpWithOutput(*model, transpose_op->inputs[0]); if (reshape_it == model->operators.end()) { - return false; + return ::tensorflow::Status::OK(); } TensorFlowReshapeOperator* reshape_op = ConvertOperator(reshape_it->get(), OperatorType::kReshape); if (reshape_op == nullptr) { - return false; + return ::tensorflow::Status::OK(); } // Ignore if the reshape is uninitialized. if (!OperatorReady(*model, reshape_op) || reshape_op->shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } // Need to copy to keep static if permutated. @@ -142,7 +145,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { // Intermediate should not be consumed by any other operators. if (CountOpsWithInput(*model, intermediate_name) != 1) { AddMessageF("Input %s used elsewhere", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // Check that the intermediate is not an output array. @@ -151,7 +154,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { "Cannot reorder reshape-transpose as it would invalidate %s which is " "an output array.", intermediate_name); - return false; + return ::tensorflow::Status::OK(); } // Get the arrays. @@ -173,7 +176,7 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { // dimensions then it can be moved between the transpose. if (!ReshapeIsEquivalentToTranspose(*model, reshape_op, true /*allow_extra_unary_dims*/)) { - return false; + return ::tensorflow::Status::OK(); } if (!IsDiscardableArray(*model, output_name)) { @@ -242,7 +245,8 @@ bool ReorderReshapeTranspose::Run(Model* model, std::size_t op_index) { // Swap the order of the operators. transpose_it->swap(*reshape_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc index 8f2c1f81628398d8c823d27ff50d59e80497d0e1..a79779f55d9c138575267faf3ae6c67791255cb1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_normalization.cc @@ -25,10 +25,13 @@ limitations under the License. namespace toco { -bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveBatchNormalization::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto bn_it = model->operators.begin() + op_index; if (bn_it->get()->type != OperatorType::kBatchNormalization) { - return false; + return ::tensorflow::Status::OK(); } const auto* bn_op = static_cast(bn_it->get()); @@ -53,7 +56,7 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { // so we need to exit early if these buffers don't exist (i.e. if the params // haven't yet been resolved as constants). if (!mean_array.buffer || !multiplier_array.buffer || !offset_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } // Create the new Mul, Add operators @@ -142,7 +145,8 @@ bool ResolveBatchNormalization::Run(Model* model, std::size_t op_index) { DCHECK_EQ(bn_it->get(), bn_op); model->operators.erase(bn_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc index b8b35161d77e5b6dd8c30e03959dba3c60d1d56c..d039d7d690d715874f8f42a9e9d6b9114d5dca8e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_batch_to_space_nd_attributes.cc @@ -24,31 +24,35 @@ limitations under the License. namespace toco { -bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveBatchToSpaceNDAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; - if (op_it->get()->type != OperatorType::kBatchToSpaceND) return false; + if (op_it->get()->type != OperatorType::kBatchToSpaceND) + return ::tensorflow::Status::OK(); auto* op = static_cast(op_it->get()); // The attributes are resolved only when the 3 attributes (block_shape, // before_crops, after_crops) are all constant. if (!op->block_shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->inputs.size(), 3); if (!IsConstantParameterArray(*model, op->inputs[1]) || !IsConstantParameterArray(*model, op->inputs[2])) - return false; + return ::tensorflow::Status::OK(); // Handle crops const auto& crops_array = model->GetArray(op->inputs[2]); - if (!crops_array.has_shape()) return false; + if (!crops_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& crops_dims = crops_array.shape().dims(); if (crops_dims.size() != 2) { // Code only handles crops of 2 dimensions. Perhaps another transformation // will delete this op. - return false; + return ::tensorflow::Status::OK(); } const std::vector& crops_buffer = crops_array.GetBuffer().data; @@ -59,7 +63,7 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) { // Handle block_shape const auto& block_shape_array = model->GetArray(op->inputs[1]); - if (!block_shape_array.has_shape()) return false; + if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& block_shape_dims = block_shape_array.shape().dims(); CHECK_EQ(block_shape_dims.size(), 1); const std::vector& block_shape_buffer = @@ -68,7 +72,8 @@ bool ResolveBatchToSpaceNDAttributes::Run(Model* model, std::size_t op_index) { op->block_shape.push_back(block_shape_buffer[i]); } - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc index f7e5aa6609bd4f7eb2a95750125e30a7803b36e1..586f546a30da32f9c2ccfa2ce135f299bcd252ff 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_binary.cc @@ -188,7 +188,10 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model, } } // namespace -bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantBinaryOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto binary_it = model->operators.begin() + op_index; const auto* binary_op = binary_it->get(); // Test for binary ops of types that we know how to resolve @@ -204,7 +207,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { binary_op->type != OperatorType::kLessEqual && binary_op->type != OperatorType::kGreater && binary_op->type != OperatorType::kGreaterEqual) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(binary_op->inputs.size(), 2); @@ -212,13 +215,13 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { const auto& input1_array = model->GetArray(binary_op->inputs[1]); // Check if both inputs are constant parameters. if (!input0_array.buffer || !input1_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } auto& output_array = model->GetArray(binary_op->outputs[0]); // Yield until the output array dims have been resolved. if (!output_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // At the moment we don't want to care about fused activation functions. @@ -229,7 +232,7 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { AddMessageF( "Not resolving constant %s because it has a fused activation function", LogName(*binary_op)); - return false; + return ::tensorflow::Status::OK(); } // Check that input data types agree. @@ -253,7 +256,8 @@ bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) { AddMessageF("Resolved constant %s to the equivalent constant array", LogName(*binary_op)); model->operators.erase(binary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc index d916ae0ddf017fe6a2fb2709db6e9de8c258adfc..0c60fdfeb3775a7203fee746c27089e67cbed236 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_concatenation.cc @@ -135,11 +135,14 @@ void SetMinMaxForConcatenedArray(GraphTransformation* transformation, } // namespace // Resolves the concatenation operator if all its inputs are constant arrays. -bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantConcatenation::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto concat_it = model->operators.begin() + op_index; const auto* concat_base_op = concat_it->get(); if (concat_base_op->type != OperatorType::kConcatenation) { - return false; + return ::tensorflow::Status::OK(); } const auto* concat_op = static_cast(concat_base_op); @@ -149,11 +152,15 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { // We also make sure the shapes of the input arrays are known and they are // all discardable. const Operator* input_op = GetOpWithOutput(*model, input_name); - if (input_op) return false; - if (!IsConstantParameterArray(*model, input_name)) return false; - if (!model->GetArray(input_name).has_shape()) return false; - if (model->GetArray(input_name).quantization_params) return false; - if (!IsDiscardableArray(*model, input_name)) return false; + if (input_op) return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, input_name)) + return ::tensorflow::Status::OK(); + if (!model->GetArray(input_name).has_shape()) + return ::tensorflow::Status::OK(); + if (model->GetArray(input_name).quantization_params) + return ::tensorflow::Status::OK(); + if (!IsDiscardableArray(*model, input_name)) + return ::tensorflow::Status::OK(); } const int concatenation_axis = concat_op->axis; @@ -205,7 +212,8 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) { // Remove concatenate operator. model->operators.erase(concat_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc index f5f2f77460c7624298d8e49a0ea30527a45bd960..4f330fdd84015331043e565feebaef246a262dd5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc @@ -59,11 +59,14 @@ void GetBoundsForQuantizedDataType(ArrayDataType quantized_data_type, } } -bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantFakeQuant::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; const auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } const auto* fakequant_op = @@ -71,12 +74,12 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { // Yield until the fakequant MinMax has been resolved. if (!fakequant_op->minmax) { - return false; + return ::tensorflow::Status::OK(); } // This transformation only applies when the input array is constant. if (!IsConstantParameterArray(*model, fakequant_op->inputs[0])) { - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(fakequant_op->inputs[0]); @@ -87,7 +90,7 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { if (!InferQuantizedDataTypeFromFakeQuant(*fakequant_op, &quantized_data_type)) { AddMessageF("Unsupported FakeQuant num_bits=%d", fakequant_op->num_bits); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Resolving constant %s", LogName(*fakequant_op)); @@ -136,7 +139,8 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { } model->operators.erase(fakequant_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc index f6f95481b57f58f497b119df73d331f13d9705c0..5400d395ffacef225a482b3de4e3218e10b9d59f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fill.cc @@ -41,11 +41,14 @@ bool ComputeFillArray(Model* model, FillOperator* op) { return true; } -bool ResolveConstantFill::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantFill::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto fill_it = model->operators.begin() + op_index; auto* base_op = fill_it->get(); if (base_op->type != OperatorType::kFill) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(base_op); @@ -55,44 +58,44 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } const auto& val_array = model->GetArray(op->inputs[1]); if (!val_array.has_shape()) { // Yield until the value shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!IsConstantParameterArray(*model, op->inputs[1])) { // Yield until the value is constant. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(RequiredBufferSizeForShape(val_array.shape()), 1); switch (output_array.data_type) { case ArrayDataType::kFloat: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kUint8: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt32: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt64: if (!ComputeFillArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; default: @@ -114,7 +117,8 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(fill_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc index 36d7dad0ce9de81ec132ef992538b6022916bfbd..6e3a6a69c23e8dd77ddc26c63f5d36f5030d0129 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc @@ -61,11 +61,14 @@ inline void Gather(const Array& input_array, int input_rank, // Resolves a constant Gather operation. // This simply performs the gather and produces the output array with the // appropriate values. -bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantGather::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kGather) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -74,28 +77,28 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } if (!op->axis) { // Yield until axis has been set by ResolveGatherAttributes. - return false; + return ::tensorflow::Status::OK(); } if (op->axis.value() != 0) { // Only handling axis=0 for now. AddMessageF("%s has axis %d; only axis=0 is supported", LogName(*op), op->axis.value()); - return false; + return ::tensorflow::Status::OK(); } // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); const Array& coords_array = model->GetArray(op->inputs[1]); @@ -142,7 +145,8 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc index e86616574d5a0f1345cde167d4ce0d41665d5a02..e257ec37e858076359068d49b00262a2e42b1e19 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_pack.cc @@ -49,11 +49,14 @@ void Pack(Model* model, PackOperator const& op) { } // namespace -bool ResolveConstantPack::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantPack::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kPack) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -62,18 +65,18 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } for (const auto& input : op->inputs) { if (!IsConstantParameterArray(*model, input)) { // Yield if any input is mutable - return false; + return ::tensorflow::Status::OK(); } } @@ -111,7 +114,8 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc index 88d06d7dc75005c89a69b881aa0064d1162227d5..db0fbba52826ebcb70196ee3ab25c199b96cea91 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_random_uniform.cc @@ -59,11 +59,14 @@ bool ComputeRandomUniformArray(Model* model, RandomUniformOperator* op) { return true; } -bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantRandomUniform::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* base_op = it->get(); if (base_op->type != OperatorType::kRandomUniform) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(base_op); @@ -73,12 +76,12 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } if ((op->seed == 0) && (op->seed2 == 0)) { @@ -86,13 +89,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { << "\" is truly random (using /dev/random system entropy). " "Therefore, cannot resolve as constant. Set \"seed\" or " "\"seed2\" attr non-zero to fix this"; - return false; + return ::tensorflow::Status::OK(); } switch (output_array.data_type) { case ArrayDataType::kFloat: if (!ComputeRandomUniformArray(model, op)) { - return false; + return ::tensorflow::Status::OK(); } break; // For future support of double or half. @@ -110,7 +113,8 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc index 1a0ba9e2bc7235720b59210cdd6affa089613077..069d4dafaabd41e79adc27083f8c14e164e22ff3 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_range.cc @@ -19,11 +19,14 @@ limitations under the License. namespace toco { -bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantRange::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* base_op = it->get(); if (base_op->type != OperatorType::kRange) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(base_op); @@ -31,23 +34,23 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { const auto& start_array = model->GetArray(op->inputs[0]); if (!start_array.has_shape()) { // Yield until all input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } const auto& limit_array = model->GetArray(op->inputs[1]); if (!limit_array.has_shape()) { // Yield until all input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } const auto& delta_array = model->GetArray(op->inputs[2]); if (!delta_array.has_shape()) { // Yield until all input dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } for (const auto& input : op->inputs) { if (!IsConstantParameterArray(*model, input)) { // yield if any input is mutable - return false; + return ::tensorflow::Status::OK(); } } @@ -55,7 +58,7 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(RequiredBufferSizeForShape(start_array.shape()), 1) @@ -101,7 +104,8 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) { // Delete the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc index a6f665b5f00ecc7b39821fa8e0b6170c176e8cf6..fccecef600fb47745ab4183a2727a8c6b81c77cc 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_reshape.cc @@ -22,11 +22,14 @@ limitations under the License. namespace toco { // Resolves a constant reshape operation by copying the buffer. -bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantReshape::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kReshape) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -36,17 +39,17 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); @@ -54,7 +57,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { AddMessageF("Constant reshape is non-trivial (%s -> %s)", ShapeToString(input_array.shape()), ShapeToString(output_array.shape())); - return false; + return ::tensorflow::Status::OK(); } CHECK(!output_array.buffer); @@ -95,7 +98,7 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { default: LOG(FATAL) << "Unsupported data type: " << ArrayDataTypeName(input_array.data_type); - return false; + return ::tensorflow::Status::OK(); } AddMessageF("Resolving constant reshape of %s", LogName(*op)); @@ -112,7 +115,8 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc index e880a3f44dab376e5e441e3d6c0f747ee8490489..ab1e0bd7a076acb75b0739026edcfdc5d78230df 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_select.cc @@ -27,11 +27,14 @@ namespace toco { // This implementation is looking strictly for all-or-nothing on the select // condition. It's possible to enhance this by looking per-element and possibly // producing a Mul op. -bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantSelect::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kSelect) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -40,23 +43,23 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } // We require the cond input to be constant. if (!IsConstantParameterArray(*model, op->inputs[0])) { - return false; + return ::tensorflow::Status::OK(); } const Array& cond_array = model->GetArray(op->inputs[0]); CHECK(cond_array.data_type == ArrayDataType::kBool) << "Only bool conditions are supported"; const auto& cond_data = cond_array.GetBuffer().data; if (cond_data.empty()) { - return false; + return ::tensorflow::Status::OK(); } // Check if the condition is the same for all elements. @@ -67,12 +70,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) { "Cannot resolve %s as constant; cond_array has differing " "per-element values", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } } // Pass-through the selected input. - return RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2); + *modified = + RemoveTrivialPassthroughOp(this, model, op_index, cond_value ? 1 : 2); + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc index 8a0e3e8995839a737b5671701a97b514b0fc7bf1..a1756a820734d6d54d1d0527ce3ad5b7b1a8aef7 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_shape_or_rank.cc @@ -19,29 +19,32 @@ limitations under the License. namespace toco { -bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantShapeOrRank::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* op = it->get(); if (!(op->type == OperatorType::kShape || op->type == OperatorType::kRank)) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->outputs.size(), 1); auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been resolved - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until the input array's shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } // Compute the output @@ -65,7 +68,8 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) { } model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc index b35c3e19c43b1c62e6bdbfe379631480e1d41703..869dfae98e6a3b24becada0ff9a08d8e7b46130e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_slice.cc @@ -86,11 +86,14 @@ bool Slice(SliceOperator const& op, Array const& input_array, } // namespace -bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantSlice::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kSlice) { - return false; + return ::tensorflow::Status::OK(); } const SliceOperator* op = static_cast(base_op); @@ -99,49 +102,49 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } if (op->begin.empty() || op->size.empty()) { // Attributes have not resolved yet. - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until the value shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!IsConstantParameterArray(*model, op->inputs[0])) { // Yield until the value is constant. - return false; + return ::tensorflow::Status::OK(); } CHECK(!output_array.buffer); switch (output_array.data_type) { case ArrayDataType::kFloat: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kUint8: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt32: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; case ArrayDataType::kInt64: if (!Slice(*op, input_array, &output_array)) { - return false; + return ::tensorflow::Status::OK(); } break; default: @@ -159,7 +162,8 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) { // Erase the operator model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc index 8853ed87e61eaf66d323dc38630816c3a98abaf7..99c5a6466230584b0ad457c8f6aa1b9c20ba6cd7 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_strided_slice.cc @@ -103,11 +103,14 @@ void StridedSlice(StridedSliceOperator const& op, Array const& input_array, } // anonymous namespace -bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantStridedSlice::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kStridedSlice) { - return false; + return ::tensorflow::Status::OK(); } const StridedSliceOperator* op = @@ -117,28 +120,28 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes - return false; + return ::tensorflow::Status::OK(); } if (op->start_indices.empty() || op->stop_indices.empty() || op->strides.empty()) { // Attributes have not resolved yet. - return false; + return ::tensorflow::Status::OK(); } const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // Yield until the value shape has been resolved. - return false; + return ::tensorflow::Status::OK(); } if (!IsConstantParameterArray(*model, op->inputs[0])) { // Yield until the value is constant. - return false; + return ::tensorflow::Status::OK(); } CHECK(!output_array.buffer); @@ -164,7 +167,8 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) { DeleteOpAndArraysIfUnused(model, it->get()); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc index 5cfa1a5582d2b7cd346764bd68f78720c8cca7e3..c5e93c9bad0f67df34e4e51803d7c1eff571c225 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_tile.cc @@ -97,11 +97,14 @@ inline void Tile(const Array& input_array, const Array& multiples_array, } // namespace // Resolves a constant Tile operation. -bool ResolveConstantTile::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantTile::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kTile) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -110,17 +113,17 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); const Array& multiples_array = model->GetArray(op->inputs[1]); @@ -159,7 +162,8 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc index fe15dfa06f4e4a9407121d6fcc63ac9587fa07cb..b759c4d6dda8739460045f696d935aa8d56a553d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_transpose.cc @@ -101,11 +101,14 @@ void Transpose(Model* model, const Array& input_array, } // namespace -bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantTranspose::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; const auto* base_op = it->get(); if (base_op->type != OperatorType::kTranspose) { - return false; + return ::tensorflow::Status::OK(); } const auto* op = static_cast(base_op); @@ -114,17 +117,17 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { auto& output_array = model->GetArray(op->outputs[0]); if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes. - return false; + return ::tensorflow::Status::OK(); } if (!output_array.has_shape()) { // Yield until the output shape has been set by PropagateFixedShapes. - return false; + return ::tensorflow::Status::OK(); } // We require constant inputs. if (!IsConstantParameterArray(*model, op->inputs[0]) || !IsConstantParameterArray(*model, op->inputs[1])) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(op->inputs[0]); @@ -132,7 +135,7 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { if (op->perm.empty()) { // Yield until perm has been populated by ResolveTransposeAttributes. - return false; + return ::tensorflow::Status::OK(); } // We currently only support 1-4 dimensions. @@ -174,7 +177,8 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) { // Erase the operator. model->operators.erase(it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index c698a9567af17938aa8bf827a1941ac14b068053..3034c1b1eb0fcf2e3e996ac6fa9b1008720e818e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -27,6 +27,73 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" namespace toco { +namespace { + +// Using the function reducer, reduce input along all axes in axes. +// Put the reduced data in output, which should aleady be appropriately sized. +// check_output_shape is set to what this code computes the final shape +// to be, so it can be cross checked with the shape computation logic. +void ReduceGeneric(bool keep_dims, const std::vector& axes, + const Shape& input_shape, const std::vector& input, + Shape* check_output_shape, std::vector* output, + const std::function& reducer) { + if (!IsNonEmpty(input_shape)) { + // Zero-dimensions will break the NextIndices() logic, so just early out if + // we have an empty shape. + return; + } + + // Set up output_shape to be the same length as input_shape, with + // appropriate dimensions squashed to 1. If keep_dims is false, we'll strip + // out the one dimensions at the end, but it's convenient to leave them for + // now. We recompute the shape because we need the output shape to have + // 1-dims in all the squashed dimensions; the shape from shape computation may + // remove those squashed dimensions, depending on the options used. + Shape output_shape = input_shape; + + // Reduction mask will be elementwise multiplied against the input + // indices to figure out the output index for the element. + std::vector reduction_mask(input_shape.dimensions_count(), 1); + for (int axis : axes) { + CHECK_GE(axis, 0); + CHECK_LT(axis, input_shape.dimensions_count()); + reduction_mask[axis] = 0; + output_shape.mutable_dims()->at(axis) = 1; + } + + std::vector output_indices(input_shape.dimensions_count()); + for (int input_offset = 0; input_offset < input.size(); ++input_offset) { + std::vector input_indices = ReverseOffset(input_shape, input_offset); + // Calculate the output location by squashing input indices to 0 + // in reduced axes. + for (int i = 0; i < input_shape.dimensions_count(); ++i) { + output_indices[i] = input_indices[i] * reduction_mask[i]; + } + int output_offset = Offset(output_shape, output_indices); + if (input_indices == output_indices) { + // Base element for the reduced axes + output->at(output_offset) = input.at(input_offset); + } else { + // Reduce with existing element. + output->at(output_offset) = + reducer(output->at(output_offset), input.at(input_offset)); + } + } + + if (!keep_dims) { + // Strip out the dims from output_shape. + std::vector new_dims; + for (int i = 0; i < output_shape.dimensions_count(); ++i) { + if (reduction_mask[i]) { + new_dims.push_back(output_shape.dims(i)); + } + } + output_shape.mutable_dims()->swap(new_dims); + } + *check_output_shape = output_shape; +} + +} // namespace bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) { auto& output_array = model->GetArray(op.outputs[0]); @@ -45,7 +112,10 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) { return true; } -bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveConstantUnaryOperator::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto unary_it = model->operators.begin() + op_index; const auto* unary_op = unary_it->get(); // Test for unary ops of types that we know how to resolve. @@ -66,28 +136,28 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { case OperatorType::kRelu: break; default: - return false; + return ::tensorflow::Status::OK(); } // Check if the input is a constant parameter. if (!IsConstantParameterArray(*model, unary_op->inputs[0])) { - return false; + return ::tensorflow::Status::OK(); } // if the unary op involves a tensor required by a rnn state, ignore it for (const auto& rnn_state : model->flags.rnn_states()) { if (unary_op->inputs[0] == rnn_state.back_edge_source_array()) { - return false; + return ::tensorflow::Status::OK(); } if (unary_op->inputs[0] == rnn_state.state_array()) { - return false; + return ::tensorflow::Status::OK(); } } auto& output_array = model->GetArray(unary_op->outputs[0]); if (!output_array.has_shape()) { // Yield until the output array dims have been resolved. - return false; + return ::tensorflow::Status::OK(); } // At the moment we don't want to care about fused activation functions. @@ -99,7 +169,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { "Not resolving constant %s " " because it has a fused activation function", LogName(*unary_op)); - return false; + return ::tensorflow::Status::OK(); } // The min-max is only copied for ops that copy data without arithmetic. @@ -120,7 +190,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { "Not resolving constant %s because we currently only support casting " "to float", LogName(*unary_op)); - return false; + return ::tensorflow::Status::OK(); } if (cast_op->src_data_type != input_array.buffer->type) { AddMessageF( @@ -130,7 +200,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { } } else { if (input_array.buffer->type != ArrayDataType::kFloat) { - return false; + return ::tensorflow::Status::OK(); } input_float_data = &(input_array.GetBuffer().data); } @@ -172,31 +242,23 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { CHECK_EQ(unary_op->inputs.size(), 2) << "Sum needs 2 inputs"; if (!IsConstantParameterArray(*model, unary_op->inputs[1])) { AddMessageF("Axis input is non-constant"); - return false; + return ::tensorflow::Status::OK(); } auto& axis_array = model->GetArray(unary_op->inputs[1]); CHECK(axis_array.data_type == ArrayDataType::kInt32); - int axis = axis_array.GetBuffer().data[0]; - CHECK_LT(axis, input_shape.dimensions_count()) << "Axis out of bounds"; - // We currently only handle reduction on axis 0. - CHECK_EQ(axis, 0) << "Only reduction along axis 0 is supported"; - // We currently only handle 1-D and 2-D input tensors. - CHECK_LE(input_shape.dimensions_count(), 2) << "Rank >2 not yet supported"; // We only support keep_dims=true; shape prop will need to change otherwise. auto sum_op = static_cast(unary_op); - CHECK(sum_op->keep_dims) << "Only keep_dims=true is supported"; + Shape check_output_shape; - std::vector indices(input_shape.dimensions_count()); - for (int i = 0; i < input_shape.dims(1); ++i) { - indices[1] = i; - float sum = 0.f; - for (int j = 0; j < input_shape.dims(0); ++j) { - indices[0] = j; - sum += (*input_float_data)[Offset(input_shape, indices)]; - } - output_float_data[i] = sum; - } + ReduceGeneric( + sum_op->keep_dims, axis_array.GetBuffer().data, + input_shape, *input_float_data, &check_output_shape, &output_float_data, + [](float existing, float current) -> float { + return existing + current; + }); + CHECK(check_output_shape == output_shape) + << "Shape propagation output shape doesn't match output shape from op"; } else if (unary_op->type == OperatorType::kReduceMin) { // At the moment only full reduction across all dimensions is supported. // TODO(starka): Output should not be padded. @@ -277,7 +339,7 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { default: LOG(FATAL) << "Unsupported activation function " << LogName(*unary_op); - return false; + return ::tensorflow::Status::OK(); } output_float_data[i] = new_value; } @@ -292,7 +354,8 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { AddMessageF("Resolved constant %s to the equivalent constant array", LogName(*unary_op)); model->operators.erase(unary_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc index 0dda1fd0b35fb0cdc3c605360df5126c52c05403..eed971c1d50293140c367f1848b5f58c5e7163d9 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_fake_quant_args_from_vars.cc @@ -25,17 +25,20 @@ limitations under the License. namespace toco { -bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveFakeQuantArgsFromVars::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto fakequant_it = model->operators.begin() + op_index; auto* fakequant_base_op = fakequant_it->get(); if (fakequant_base_op->type != OperatorType::kFakeQuant) { - return false; + return ::tensorflow::Status::OK(); } auto* fakequant_op = static_cast(fakequant_base_op); if (fakequant_op->minmax) { // Already resolved. - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(fakequant_op->inputs.size(), 3); @@ -43,7 +46,7 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) { // resolved to constant arrays. for (int i = 1; i <= 2; i++) { if (!IsConstantParameterArray(*model, fakequant_op->inputs[i])) { - return false; + return ::tensorflow::Status::OK(); } } @@ -74,7 +77,8 @@ bool ResolveFakeQuantArgsFromVars::Run(Model* model, std::size_t op_index) { DeleteArrayIfUsedOnce(fakequant_op->inputs[i], model); } fakequant_op->inputs.resize(1); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc index ce825c91af428c866ca9f83b765399f209606af9..69209b8dec7dd0bfe552f70a426b92c6386953e8 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_gather_attributes.cc @@ -24,20 +24,25 @@ limitations under the License. namespace toco { -bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveGatherAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto* gather_op = model->operators[op_index].get(); - if (gather_op->type != OperatorType::kGather) return false; + if (gather_op->type != OperatorType::kGather) + return ::tensorflow::Status::OK(); auto* op = static_cast(gather_op); if (op->axis) { // Attributes already resolved - return false; + return ::tensorflow::Status::OK(); } - if (op->inputs.size() != 3) return false; - if (!IsConstantParameterArray(*model, op->inputs[2])) return false; + if (op->inputs.size() != 3) return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[2])) + return ::tensorflow::Status::OK(); const auto& indices_array = model->GetArray(op->inputs[2]); - if (!indices_array.has_shape()) return false; + if (!indices_array.has_shape()) return ::tensorflow::Status::OK(); const auto& axis_data = indices_array.GetBuffer().data; CHECK_EQ(axis_data.size(), 1) << "Multidimensional gather not supported on " << LogName(*op); @@ -47,7 +52,8 @@ bool ResolveGatherAttributes::Run(Model* model, std::size_t op_index) { DeleteArrayIfUsedOnce(op->inputs[2], model); op->inputs.resize(2); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc index b2b2ea151bc936d8463d4e4598de5f0d1342edfe..ac94f453213068e33a7abe3de1d47a774245194e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_multiply_by_zero.cc @@ -51,27 +51,30 @@ void FillArrayWithZeros(Array* array) { // Removes a multiplication by array of constant zeros by making the output // array an array of constant zeros and removing the input arrays if they are no // longer needed. -bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveMultiplyByZero::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto mul_it = model->operators.begin() + op_index; auto* mul_op = mul_it->get(); if (mul_op->type != OperatorType::kMul) { - return false; + return ::tensorflow::Status::OK(); } const auto& output_array_name = mul_op->outputs[0]; auto& output_array = model->GetArray(output_array_name); if (!IsDiscardableArray(*model, output_array_name)) { - return false; + return ::tensorflow::Status::OK(); } if (output_array.data_type == ArrayDataType::kNone) { // Yield until the output type has been set by PropagateArrayDataTypes - return false; + return ::tensorflow::Status::OK(); } // Yield if the output shape is not known yet. if (!output_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // This transformation only handles the case where one operand is all 0's and @@ -83,12 +86,12 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { }; if (!is_input_constant[0] && !is_input_constant[1]) { // Neither input is constant, so nothing we can resolve here. - return false; + return ::tensorflow::Status::OK(); } if (is_input_constant[0] && is_input_constant[1]) { // Both inputs are constants. That's a job for constants propagation, not // for us to handle here. - return false; + return ::tensorflow::Status::OK(); } const int index_of_constant_input = is_input_constant[0] ? 0 : 1; const int index_of_variable_input = is_input_constant[0] ? 1 : 0; @@ -105,7 +108,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; @@ -114,7 +117,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; @@ -123,7 +126,7 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; @@ -132,14 +135,14 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { constant_input_array.GetBuffer().data; if (!AreAllBufferElementsZero>( constant_input_data)) { - return false; + return ::tensorflow::Status::OK(); } FillArrayWithZeros(&output_array); } break; default: AddMessageF( "Cannot resolve multiply by 0 because of unsupported data type\n"); - return false; + return ::tensorflow::Status::OK(); } // Erase input arrays to the multiply if no longer used @@ -149,7 +152,8 @@ bool ResolveMultiplyByZero::Run(Model* model, std::size_t op_index) { // Erase the multiply operator. model->operators.erase(mul_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc index 8a8e723cf7b2d77ec199e3817464a068bf85afdd..adc87753bc71cc192eb71482bf7165b65fa417db 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_pad_attributes.cc @@ -24,19 +24,23 @@ limitations under the License. namespace toco { -bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolvePadAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto pad_it = model->operators.begin() + op_index; auto* pad_op = pad_it->get(); - if (pad_op->type != OperatorType::kPad) return false; + if (pad_op->type != OperatorType::kPad) return ::tensorflow::Status::OK(); auto* op = static_cast(pad_op); - if (!op->left_padding.empty()) return false; + if (!op->left_padding.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 2); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); const auto& array = model->GetArray(op->inputs[1]); - if (!array.has_shape()) return false; + if (!array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& dims = array.shape().dims(); CHECK_EQ(dims.size(), 2); @@ -50,6 +54,7 @@ bool ResolvePadAttributes::Run(Model* model, std::size_t op_index) { // TODO(dkalenichenko): Delete the extra input? - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc index ebb023e34223a57a2ad5708662d9c443949fcd0a..1f0f17a37a99c6b721daa75321fa7316f102cd83 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_padv2_attributes.cc @@ -24,19 +24,23 @@ limitations under the License. namespace toco { -bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolvePadV2Attributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto pad_it = model->operators.begin() + op_index; auto* pad_op = pad_it->get(); - if (pad_op->type != OperatorType::kPadV2) return false; + if (pad_op->type != OperatorType::kPadV2) return ::tensorflow::Status::OK(); auto* op = static_cast(pad_op); - if (!op->left_padding.empty()) return false; + if (!op->left_padding.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 3); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); const auto& array = model->GetArray(op->inputs[1]); - if (!array.has_shape()) return false; + if (!array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& dims = array.shape().dims(); CHECK_EQ(dims.size(), 2); @@ -50,6 +54,7 @@ bool ResolvePadV2Attributes::Run(Model* model, std::size_t op_index) { // TODO(dkalenichenko): Delete the extra input? - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc index 73198ac7c032fc67d8ed85259bc779c5c06e1e16..c3246ab90fc49290181f7055cf51337c61630ff3 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reduce_attributes.cc @@ -39,23 +39,37 @@ bool ResolveAttributes(Model* model, T* op) { return true; } -bool ResolveReduceAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveReduceAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; Operator* op = model->operators[op_index].get(); switch (op->type) { case OperatorType::kMean: - return ResolveAttributes(model, static_cast(op)); + *modified = ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kSum: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kReduceProd: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kReduceMin: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kReduceMax: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); case OperatorType::kAny: - return ResolveAttributes(model, static_cast(op)); + *modified = + ResolveAttributes(model, static_cast(op)); + return ::tensorflow::Status::OK(); default: - return false; + return ::tensorflow::Status::OK(); } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc index 8e150db6fa6e7e1d2a8d92babc0d0736f25b535d..ee5c4810e61326389e402b22f57622720c9fa7dd 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reorder_axes.cc @@ -78,11 +78,13 @@ void ReorderAxes(AxesOrder input_axes_order, AxesOrder output_axes_order, } } -bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveReorderAxes::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto it = model->operators.begin() + op_index; auto* op = it->get(); if (op->type != OperatorType::kReorderAxes) { - return false; + return ::tensorflow::Status::OK(); } auto* reorder_op = static_cast(op); @@ -93,11 +95,11 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) { auto& input_array = model->GetArray(input_array_name); auto& output_array = model->GetArray(output_array_name); if (!input_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } // Yield until output dims have been resolved. if (!output_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // Reorder the input array dims and buffer data if (input_array.buffer->type == ArrayDataType::kFloat) { @@ -120,7 +122,8 @@ bool ResolveReorderAxes::Run(Model* model, std::size_t op_index) { DeleteOpAndArraysIfUnused(model, op); RenameArray(model, output_array_name, input_array_name); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc index b615c9a545695e5d14fa5809e0c38a770f23ea24..7b7a59264ff74f02272d02d001106e4a29d43b6e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_reshape_attributes.cc @@ -25,25 +25,29 @@ limitations under the License. namespace toco { -bool ResolveReshapeAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveReshapeAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto reshape_it = model->operators.begin() + op_index; auto* reshape_op = reshape_it->get(); if (reshape_op->type != OperatorType::kReshape) { - return false; + return ::tensorflow::Status::OK(); } auto* op = static_cast(reshape_op); - if (!op->shape.empty()) return false; + if (!op->shape.empty()) return ::tensorflow::Status::OK(); if (IsConstantParameterArray(*model, reshape_op->inputs[1])) { const auto& constant_input_array = model->GetArray(reshape_op->inputs[1]); op->shape = constant_input_array.GetBuffer().data; } - if (op->shape.empty()) return false; + if (op->shape.empty()) return ::tensorflow::Status::OK(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc index e760d08e5a6c2f56db6b11fee922b701d33dd1a0..5a838168de73821d19c4299ad289f9f50bd225f1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_slice_attributes.cc @@ -24,29 +24,35 @@ limitations under the License. namespace toco { -bool ResolveSliceAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveSliceAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto slice_it = model->operators.begin() + op_index; auto* slice_op = slice_it->get(); - if (slice_op->type != OperatorType::kSlice) return false; + if (slice_op->type != OperatorType::kSlice) return ::tensorflow::Status::OK(); auto* op = static_cast(slice_op); - if (!op->begin.empty()) return false; + if (!op->begin.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 3); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; - if (!IsConstantParameterArray(*model, op->inputs[2])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[2])) + return ::tensorflow::Status::OK(); const auto& begin_array = model->GetArray(op->inputs[1]); - if (!begin_array.has_shape()) return false; + if (!begin_array.has_shape()) return ::tensorflow::Status::OK(); const auto& size_array = model->GetArray(op->inputs[2]); - if (!size_array.has_shape()) return false; + if (!size_array.has_shape()) return ::tensorflow::Status::OK(); op->begin = begin_array.GetBuffer().data; op->size = size_array.GetBuffer().data; // TODO(dkalenichenko): Delete the extra inputs? - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc index fab50bec1fc5ec50cecba53845457931ed59c0b8..3804145c4f8cef7808306f565eba310aca5af811 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_space_to_batch_nd_attributes.cc @@ -24,16 +24,20 @@ limitations under the License. namespace toco { -bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveSpaceToBatchNDAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; - if (op_it->get()->type != OperatorType::kSpaceToBatchND) return false; + if (op_it->get()->type != OperatorType::kSpaceToBatchND) + return ::tensorflow::Status::OK(); auto* op = static_cast(op_it->get()); // The attributes are resolved only when the 3 attributes (block_shape, // before_paddings, after_paddings) are all constant. if (!op->block_shape.empty()) { - return false; + return ::tensorflow::Status::OK(); } const int block_shape_index = 1; @@ -42,16 +46,16 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { CHECK_EQ(op->inputs.size(), 3); if (!IsConstantParameterArray(*model, op->inputs[block_shape_index]) || !IsConstantParameterArray(*model, op->inputs[paddings_index])) - return false; + return ::tensorflow::Status::OK(); // Handle paddings. const auto& paddings_array = model->GetArray(op->inputs[paddings_index]); - if (!paddings_array.has_shape()) return false; + if (!paddings_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& paddings_dims = paddings_array.shape().dims(); if (paddings_dims.size() != 2) { // Code only handles padding of 2 dimensions. Perhaps another transformation // will delete this op. - return false; + return ::tensorflow::Status::OK(); } const std::vector& paddings_buffer = paddings_array.GetBuffer().data; @@ -63,7 +67,7 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { // Handle block_shape. const auto& block_shape_array = model->GetArray(op->inputs[block_shape_index]); - if (!block_shape_array.has_shape()) return false; + if (!block_shape_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& block_shape_dims = block_shape_array.shape().dims(); CHECK_EQ(block_shape_dims.size(), 1); const std::vector& block_shape_buffer = @@ -72,7 +76,8 @@ bool ResolveSpaceToBatchNDAttributes::Run(Model* model, std::size_t op_index) { op->block_shape.push_back(block_shape_buffer[i]); } - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc index e8bb85704e1c750300079681b5a12f6a488b6b48..c601b0774e6274ecdc9b97f61c47b83306044a3e 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_squeeze_attributes.cc @@ -25,10 +25,13 @@ limitations under the License. namespace toco { -bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveSqueezeAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto* squeeze_op = model->operators[op_index].get(); if (squeeze_op->type != OperatorType::kSqueeze) { - return false; + return ::tensorflow::Status::OK(); } DCHECK_EQ(squeeze_op->inputs.size(), 1); DCHECK_EQ(squeeze_op->outputs.size(), 1); @@ -42,10 +45,11 @@ bool ResolveSqueezeAttributes::Run(Model* model, std::size_t op_index) { "Reshape op", LogName(*squeeze_op)); - return RemoveTrivialPassthroughOp(this, model, op_index); + *modified = RemoveTrivialPassthroughOp(this, model, op_index); + return ::tensorflow::Status::OK(); } } - return false; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc index 65132d7d1ef0626e0ad41a88b8e7999a1c1cf684..f54f5b42a1f4c04ef46d18de3a36b70f54978433 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_strided_slice_attributes.cc @@ -37,40 +37,47 @@ int PadAttributeArray(Array* attribute_array, std::vector pad_values, return mask; } -bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveStridedSliceAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto slice_it = model->operators.begin() + op_index; auto* slice_op = slice_it->get(); - if (slice_op->type != OperatorType::kStridedSlice) return false; + if (slice_op->type != OperatorType::kStridedSlice) + return ::tensorflow::Status::OK(); auto* op = static_cast(slice_op); if (!op->start_indices.empty()) { // We have already resolved these attributes - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(op->inputs.size(), 4); const auto& input_array = model->GetArray(op->inputs[0]); if (!input_array.has_shape()) { // We require the dimensionality of the input to pad the indices - return false; + return ::tensorflow::Status::OK(); } auto& start_array = model->GetArray(op->inputs[1]); - if (!start_array.has_shape()) return false; + if (!start_array.has_shape()) return ::tensorflow::Status::OK(); if (toco::RequiredBufferSizeForShape(start_array.shape()) > 4) { // Only 1-4D arrays are supported for now. - return false; + return ::tensorflow::Status::OK(); } auto& stop_array = model->GetArray(op->inputs[2]); - if (!stop_array.has_shape()) return false; + if (!stop_array.has_shape()) return ::tensorflow::Status::OK(); auto& stride_array = model->GetArray(op->inputs[3]); - if (!stride_array.has_shape()) return false; + if (!stride_array.has_shape()) return ::tensorflow::Status::OK(); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; - if (!IsConstantParameterArray(*model, op->inputs[2])) return false; - if (!IsConstantParameterArray(*model, op->inputs[3])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[2])) + return ::tensorflow::Status::OK(); + if (!IsConstantParameterArray(*model, op->inputs[3])) + return ::tensorflow::Status::OK(); int num_input_axes = input_array.shape().dimensions_count(); int start_indices_size = start_array.shape().dims(0); @@ -112,6 +119,7 @@ bool ResolveStridedSliceAttributes::Run(Model* model, std::size_t op_index) { op->stop_indices = stop_array.GetBuffer().data; op->strides = stride_array.GetBuffer().data; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc index fa5ee899334bdf2d39a6861b0e0c4548142e9d2a..4927ccd95d34f30e5f06261057a97bb95a7b1b8b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_concat.cc @@ -25,12 +25,15 @@ limitations under the License. namespace toco { -bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowConcat::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto concat_it = model->operators.begin() + op_index; const auto* tf_concat_op = concat_it->get(); if (tf_concat_op->type != OperatorType::kConcat && tf_concat_op->type != OperatorType::kConcatV2) { - return false; + return ::tensorflow::Status::OK(); } CHECK_GE(tf_concat_op->inputs.size(), 2); @@ -54,7 +57,7 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) { if (!axis_array.buffer) { AddMessageF("Waiting for the axis of %s to be resolved to a constant", LogName(*tf_concat_op)); - return false; + return ::tensorflow::Status::OK(); } CHECK(axis_array.data_type == ArrayDataType::kInt32); @@ -79,7 +82,8 @@ bool ResolveTensorFlowConcat::Run(Model* model, std::size_t op_index) { } // Remove the TensorFlowConcat op model->operators.erase(concat_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc index 65346c4fe4ab027c5a24740a7cb39be9a38fcc2c..da039da546fc5d1c8455310dc03896e5de5433be 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_matmul.cc @@ -55,10 +55,13 @@ TransposeOperator* FindTransposeOpWithInput(const Model& model, } // namespace -bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowMatMul::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; auto matmul_it = model->operators.begin() + op_index; if (matmul_it->get()->type != OperatorType::kMatMul) { - return false; + return ::tensorflow::Status::OK(); } const auto* matmul_op = static_cast(matmul_it->get()); @@ -73,7 +76,7 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { "Not replacing %s by a FullyConnected operator, because it has " "the transpose_a attribute", LogName(*matmul_op)); - return false; + return ::tensorflow::Status::OK(); } // Reorder the axes on the second input. TensorFlow uses row-major ordering @@ -198,7 +201,8 @@ bool ResolveTensorFlowMatMul::Run(Model* model, std::size_t op_index) { // erase the MatMul operator model->operators.erase(matmul_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc index 4edffe3d48fd880c0261b34fc407b8e2ac66ccb9..9beea3e937b284a5ac0a7a8fb8de02506e9554d4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_merge.cc @@ -24,11 +24,14 @@ limitations under the License. namespace toco { -bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowMerge::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto merge_it = model->operators.begin() + op_index; const auto* merge_op = merge_it->get(); if (merge_op->type != OperatorType::kMerge) { - return false; + return ::tensorflow::Status::OK(); } // We need to yield until this Merge node has only 1 input, which will mean @@ -37,7 +40,7 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { // non-selected inputs, so that at some point there will be only 1 input left. if (merge_op->inputs.size() > 1) { AddMessageF("Waiting for %s to be resolved", LogName(*merge_op)); - return false; + return ::tensorflow::Status::OK(); } // Now that the merge node has 1 input exactly, it is the same as an Identity @@ -57,7 +60,8 @@ bool ResolveTensorFlowMerge::Run(Model* model, std::size_t op_index) { AddMessageF("Removing already-resolved %s", LogName(*merge_op)); model->EraseArray(merge_op->outputs[0]); model->operators.erase(merge_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc index 8bef440afd21572d7014e4f376be3aba2d80127d..e215981b42262fa7a893841a367ae0711c6458f2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_tensorflow_switch.cc @@ -24,11 +24,14 @@ limitations under the License. namespace toco { -bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTensorFlowSwitch::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto switch_it = model->operators.begin() + op_index; const auto* switch_op = switch_it->get(); if (switch_op->type != OperatorType::kSwitch) { - return false; + return ::tensorflow::Status::OK(); } CHECK_EQ(switch_op->inputs.size(), 2); @@ -40,7 +43,7 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) { AddMessageF( "Waiting for the boolean predicate of %s to be resolved to a constant", LogName(*switch_op)); - return false; + return ::tensorflow::Status::OK(); } // The predicate should be boolean, and should consist of a single value. @@ -119,7 +122,8 @@ bool ResolveTensorFlowSwitch::Run(Model* model, std::size_t op_index) { // Remove the switch node itself. AddMessageF("Removing already-resolved %s", LogName(*switch_op)); model->operators.erase(switch_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc index a657ee00af66bd431f96c361e12d5213e203b3df..aa7945391c766c27d44d262472a17e24cae210fa 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_transpose_attributes.cc @@ -24,19 +24,24 @@ limitations under the License. namespace toco { -bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ResolveTransposeAttributes::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto op_it = model->operators.begin() + op_index; - if (op_it->get()->type != OperatorType::kTranspose) return false; + if (op_it->get()->type != OperatorType::kTranspose) + return ::tensorflow::Status::OK(); auto* op = static_cast(op_it->get()); - if (!op->perm.empty()) return false; + if (!op->perm.empty()) return ::tensorflow::Status::OK(); CHECK_EQ(op->inputs.size(), 2); - if (!IsConstantParameterArray(*model, op->inputs[1])) return false; + if (!IsConstantParameterArray(*model, op->inputs[1])) + return ::tensorflow::Status::OK(); // Handling perm. const auto& perm_array = model->GetArray(op->inputs[1]); - if (!perm_array.has_shape()) return false; + if (!perm_array.has_shape()) return ::tensorflow::Status::OK(); const std::vector& perm_dims = perm_array.shape().dims(); CHECK_EQ(perm_dims.size(), 1); @@ -47,7 +52,8 @@ bool ResolveTransposeAttributes::Run(Model* model, std::size_t op_index) { op->perm.push_back(perm_buffer[i]); } - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc index 22c258cec5fde4144c4b048d5ec60a8604362cbb..e9f24a29ab469587bb765068409906e31e679297 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/shuffle_fc_weights.cc @@ -24,15 +24,17 @@ limitations under the License. namespace toco { -bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { +::tensorflow::Status ShuffleFCWeights::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; Operator* op = model->operators[op_index].get(); if (op->type != OperatorType::kFullyConnected) { - return false; + return ::tensorflow::Status::OK(); } FullyConnectedOperator* fc_op = static_cast(op); // Exit if this FC op already has shuffled weights if (fc_op->weights_format != FullyConnectedWeightsFormat::kDefault) { - return false; + return ::tensorflow::Status::OK(); } const Array& input_array = model->GetArray(fc_op->inputs[0]); const string& weights_name = fc_op->inputs[1]; @@ -46,11 +48,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { output_array.data_type != ArrayDataType::kInt16 || !input_array.quantization_params || !weights_array.quantization_params || !output_array.quantization_params) { - return false; + return ::tensorflow::Status::OK(); } // Exit if the shapes aren't known if (!input_array.has_shape() || !weights_array.has_shape()) { - return false; + return ::tensorflow::Status::OK(); } // Exit if, based on the known shapes, this FC op is not a GEMV. // The shuffling of FC weights is only useful to enable fast GEMV paths. @@ -64,7 +66,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "the input shape is not 1D or 2D (possibly with additional inner " "dimensions of size 1)", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } } if (input_shape.dims(0) != 1 && input_shape.dims(0) != 4) { @@ -73,7 +75,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "the input shape's leading dimension, i.e. the 'batch size', is not " "equal to 1 or 4", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } // Exit if the weights shape isn't an integral multiple of the shuffled // block shape, 4x16. We don't want to have to write code dealing with @@ -88,7 +90,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { // two. const Shape& weights_shape = weights_array.shape(); if (weights_shape.dimensions_count() != 2) { - return false; + return ::tensorflow::Status::OK(); } const int rows = weights_shape.dims(0); const int cols = weights_shape.dims(1); @@ -97,11 +99,11 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "Not applying experimental shuffling to the weights of %s because its " "shape isn't a multiple of the shuffling block shape, 4x16", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } // Exit if the weights aren't already a constant array. if (!weights_array.buffer) { - return false; + return ::tensorflow::Status::OK(); } // Exit if the weights are used by more than one op. if (CountOpsWithInput(*model, weights_name) != 1) { @@ -109,7 +111,7 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { "Not applying experimental shuffling to the weights of %s because that " "array is consumed by other operators", LogName(*op)); - return false; + return ::tensorflow::Status::OK(); } // Compute the shuffled weights auto& weights_data = @@ -152,7 +154,8 @@ bool ShuffleFCWeights::Run(Model* model, std::size_t op_index) { shuffled_input_workspace_array.GetOrCreateQuantizationParams() = input_array.GetQuantizationParams(); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD index acf1e3ede5197e899527f8874831165c7ebbf431..6f1be298caaf110f7ef7113bfeb930c96b0ec9de 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/BUILD @@ -30,3 +30,16 @@ tf_cc_test( "@com_google_googletest//:gtest_main", ], ) + +tf_cc_test( + name = "resolve_constant_unary_test", + srcs = ["resolve_constant_unary_test.cc"], + tags = ["no_oss"], + deps = [ + "//tensorflow/contrib/lite/toco:graph_transformations", + "//tensorflow/contrib/lite/toco:model", + "//tensorflow/contrib/lite/toco:tooling_util", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest_main", + ], +) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc index 66cfed4ac26969729d1881f11ba6ae74d9817fb5..e2a6f12481c336d85be938fead0f3173b9beeebe 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_concatenation_test.cc @@ -166,7 +166,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis0) { GraphTransformationsSet graph_transformation_set; graph_transformation_set.Add(new toco::ResolveConstantConcatenation); EXPECT_THAT(model.GetArrayMap().size(), 5); - (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + bool modified; + ASSERT_TRUE((*graph_transformation_set.begin()) + ->Run(&model, /*op_index=*/0, &modified) + .ok()); EXPECT_THAT(model.GetArrayMap().size(), 1); auto& concatenated_array = (*model.GetArrayMap().begin()).second; @@ -185,7 +188,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis1) { GraphTransformationsSet graph_transformation_set; graph_transformation_set.Add(new toco::ResolveConstantConcatenation); EXPECT_THAT(model.GetArrayMap().size(), 5); - (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + bool modified; + ASSERT_TRUE((*graph_transformation_set.begin()) + ->Run(&model, /*op_index=*/0, &modified) + .ok()); EXPECT_THAT(model.GetArrayMap().size(), 1); auto& concatenated_array = (*model.GetArrayMap().begin()).second; @@ -204,7 +210,10 @@ TEST_F(ResolveConstantConcatenationTest, ConcatAtAxis2) { GraphTransformationsSet graph_transformation_set; graph_transformation_set.Add(new toco::ResolveConstantConcatenation); EXPECT_THAT(model.GetArrayMap().size(), 5); - (*graph_transformation_set.begin())->Run(&model, /*op_index=*/0); + bool modified; + ASSERT_TRUE((*graph_transformation_set.begin()) + ->Run(&model, /*op_index=*/0, &modified) + .ok()); EXPECT_THAT(model.GetArrayMap().size(), 1); auto& concatenated_array = (*model.GetArrayMap().begin()).second; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..57d85a0435179f84239d0142c7c1c45a7f3187e2 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/tests/resolve_constant_unary_test.cc @@ -0,0 +1,141 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include +#include "absl/memory/memory.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +namespace { + +void RunResolveSum(const std::vector& input, + const std::vector& input_shape, + const std::vector& axis, + const std::vector& output_shape, + const std::vector& expected_output) { + Model model; + Array& input0 = model.GetOrCreateArray("input0"); + Array& input1 = model.GetOrCreateArray("input1"); + Array& output = model.GetOrCreateArray("output"); + + *input0.mutable_shape()->mutable_dims() = input_shape; + input0.data_type = ArrayDataType::kFloat; + input0.GetMutableBuffer().data = input; + + *input1.mutable_shape()->mutable_dims() = {static_cast(axis.size())}; + input1.GetMutableBuffer().data = axis; + input1.data_type = ArrayDataType::kInt32; + + *output.mutable_shape()->mutable_dims() = output_shape; + + auto sum_op = absl::make_unique(); + sum_op->keep_dims = true; + sum_op->inputs = {"input0", "input1"}; + sum_op->outputs = {"output"}; + model.operators.push_back(std::move(sum_op)); + bool modified; + ASSERT_TRUE(ResolveConstantUnaryOperator().Run(&model, 0, &modified).ok()); + EXPECT_EQ(model.GetArray("output").GetBuffer().data, + expected_output); + EXPECT_EQ(model.GetArray("output").shape().dims(), output_shape); +} + +// Reduce a 2d array across axis 0 +TEST(ResolveConstantUnary, ResolveSumAxis0_2D) { + // clang-format off + RunResolveSum( + // Input data + {3, 1, 4, 1, + 5, 9, 2, 6, + 5, 3, 5, 8}, + + // Input shape + {3, 4}, + + // Axes + {0}, + + // Expected output shape, + {1, 4}, + + // Expected output + {13, 13, 11, 15}); + // clang-format on +} + +// Reduce a 2d array across axis 1 +TEST(ResolveConstantUnary, ResolveSumAxis1_2D) { + // clang-format off + RunResolveSum( + // Input data + {3, 1, 4, 1, + 5, 9, 2, 6, + 5, 3, 5, 8}, + + // Input shape + {3, 4}, + + // Axes + {1}, + + // Expected output shape, + {3, 1}, + + // Expected output + {9, 22, 21}); + // clang-format on +} + +// Reduce a 3d tensor across axes 0 and 2. +TEST(ResolveConstantUnary, ResolveSumAxis0_2_3D) { + // clang-format off + RunResolveSum( + // Input data + { 0, 1, 2, + 3, 10, 11, + 12, 13, 20, + 21, 22, 23, + + 100, 101, 102, + 103, 110, 111, + 112, 113, 120, + 121, 122, 123, + + 200, 201, 202, + 203, 210, 211, + 212, 213, 220, + 221, 222, 223 }, + + // Input shape + {3, 4, 3}, + + // Axes + {0, 2}, + + // Expected output shape, + {1, 4, 1}, + + // Expected output, generated using octave. + { 909, 972, 1035, 1098}); + // clang-format on +} + +} // namespace +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc index 69bad2fa89cb89cd74e3a4bca98da906a322a670..4ada5c3fd072609d963cfe85c960e917720aa0cb 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unfuse_activation_functions.cc @@ -25,13 +25,16 @@ limitations under the License. namespace toco { -bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) { +::tensorflow::Status UnfuseActivationFunctions::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; const auto it = model->operators.begin() + op_index; auto* op = it->get(); // If a conv operation has an im2col array, yield: it should be dropped first. if ((op->type == OperatorType::kConv) && (op->outputs.size() == 2)) { - return false; + return ::tensorflow::Status::OK(); } Operator* ac_op = nullptr; @@ -46,7 +49,7 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) { ac_op = new Relu1Operator; break; default: - return false; + return ::tensorflow::Status::OK(); } // At this point we know that the op has a fused activation function. At the @@ -74,7 +77,8 @@ bool UnfuseActivationFunctions::Run(Model* model, std::size_t op_index) { ac_op->inputs = {tmp_array_name}; op->outputs = {tmp_array_name}; - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc index dd9e26e68bd7e6d5cb751fdbf705b861c3f2f188..e19527968d67f963ccd185f243a034a46cdc9fa7 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc @@ -22,7 +22,10 @@ limitations under the License. namespace toco { -bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { +::tensorflow::Status UnpartitionEmbeddingLookup::Run(Model* model, + std::size_t op_index, + bool* modified) { + *modified = false; // Collapses a partitioned tf.nn.embedding_lookup back into a single Gather. // https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup // This transform attempts to identify the len(params) > 1 case and collapse @@ -47,7 +50,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { // First look for the final DynamicStitch. auto op_it = model->operators.begin() + op_index; if (op_it->get()->type != OperatorType::kDynamicStitch) { - return false; + return ::tensorflow::Status::OK(); } auto* stitch_op = static_cast(op_it->get()); @@ -72,7 +75,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because indices input %s into " "%s is unexpected", LogName(*op), LogName(*stitch_op)); - return false; + return ::tensorflow::Status::OK(); } if (!indices_partition_op) { indices_partition_op = static_cast(op); @@ -83,7 +86,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because indices input %s into " "%s is from a different source op than others", LogName(*op), LogName(*stitch_op)); - return false; + return ::tensorflow::Status::OK(); } } } @@ -92,12 +95,12 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { // The data for the indices must be a constant range of the array shape. if (!IsConstantParameterArray(*model, indices_partition_op->inputs[0])) { AddMessageF("Skipping because indices partition data is non-constant"); - return false; + return ::tensorflow::Status::OK(); } auto& indices_data_array = model->GetArray(indices_partition_op->inputs[0]); if (indices_data_array.data_type == ArrayDataType::kNone) { // Yield until data types are propagated. - return false; + return ::tensorflow::Status::OK(); } CHECK(indices_data_array.data_type == ArrayDataType::kInt32) << "Indices partition inputs must be int32"; @@ -117,7 +120,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because data input %s into %s " "is unexpected", LogName(*op), LogName(*stitch_op)); - return false; + return ::tensorflow::Status::OK(); } gather_ops.push_back(static_cast(op)); } @@ -132,7 +135,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because data input %s into " "%s is unexpected", LogName(*op), LogName(*gather_op)); - return false; + return ::tensorflow::Status::OK(); } if (!data_partition_op) { data_partition_op = static_cast(op); @@ -143,7 +146,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { "Skipping because data input %s into " "%s is from a different source op than others", LogName(*op), LogName(*gather_op)); - return false; + return ::tensorflow::Status::OK(); } } } @@ -236,7 +239,8 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { DeleteOpAndArraysIfUnused(model, indices_partition_op); DeleteOpAndArraysIfUnused(model, data_partition_op); DeleteOpAndArraysIfUnused(model, stitch_op); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc index fedf4441e2424e9c26c5c1c8a6f07a406c0d937b..5ff39aa313b27968f2988f792f7bef0697b9c7fc 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unroll_batch_matmul.cc @@ -36,10 +36,12 @@ namespace toco { // slice_c = tf.matmul(slice_a, slice_b) // result_slices[bat] = slice_c // result = tf.stack(result_slices) -bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { +::tensorflow::Status UnrollBatchMatMul::Run(Model* model, std::size_t op_index, + bool* modified) { + *modified = false; auto batch_op_it = model->operators.begin() + op_index; if (batch_op_it->get()->type != OperatorType::kBatchMatMul) { - return false; + return ::tensorflow::Status::OK(); } const auto* batch_op = static_cast(batch_op_it->get()); @@ -47,7 +49,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { // We must have the shape of at least one input to know our batch size. const auto& input_array_a = model->GetArray(batch_op->inputs[0]); const auto& input_array_b = model->GetArray(batch_op->inputs[1]); - if (!input_array_a.has_shape() || !input_array_b.has_shape()) return false; + if (!input_array_a.has_shape() || !input_array_b.has_shape()) + return ::tensorflow::Status::OK(); // We only support the rank 3 case. If you are batching on rank > 3 you'll // have to figure that out. @@ -66,7 +69,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { batch_op_it = matmul_op_it + 1; CHECK_EQ(batch_op_it->get(), batch_op); model->operators.erase(batch_op_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } CHECK_EQ(input_array_a.shape().dimensions_count(), 3) << "Input arrays must have rank 3"; @@ -167,7 +171,8 @@ bool UnrollBatchMatMul::Run(Model* model, std::size_t op_index) { CHECK(batch_op_it != model->operators.end()); CHECK(batch_op_it->get() == batch_op); model->operators.erase(batch_op_it); - return true; + *modified = true; + return ::tensorflow::Status::OK(); } } // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 133ef79a34d9a346ac56f576db0934656c6d4a3d..41d02df5f084f2db9a51dd2ddbf954b4a8b9a223 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -43,6 +43,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/public/session_options.h" @@ -1121,13 +1122,25 @@ tensorflow::Status ConvertUnsupportedOperator( op->inputs.push_back(node.input(i)); } - // Parse outputs. - op->outputs.push_back(node.name()); // Implicit :0. + // Parse outputs. Name them after the node's name, plus an ordinal suffix. + // Note that some outputs are to be multipled by a named attribute. const tensorflow::OpDef* op_def = nullptr; if (tensorflow::OpRegistry::Global()->LookUpOpDef(node.op(), &op_def).ok()) { - for (int i = 1; i < op_def->output_arg_size(); ++i) { - op->outputs.push_back(absl::StrCat(node.name(), ":", i)); + int next_output = 0; + for (int i = 0; i < op_def->output_arg_size(); ++i) { + string multiples = op_def->output_arg(i).number_attr(); + int num_outputs = multiples.empty() ? 1 : GetIntAttr(node, multiples); + for (int j = 0; j < num_outputs; ++j) { + if (next_output == 0) { + op->outputs.push_back(node.name()); // Implicit :0. + } else { + op->outputs.push_back(absl::StrCat(node.name(), ":", next_output)); + } + ++next_output; + } } + } else { + op->outputs.push_back(node.name()); // Implicit :0. } // Parse if the op supports quantization @@ -1151,11 +1164,14 @@ tensorflow::Status ConvertUnsupportedOperator( op->output_data_types.push_back(ConvertDataType(output_type)); } else if (op_def != nullptr) { for (const auto& output_arg : op_def->output_arg()) { - if (HasAttr(node, output_arg.type_attr())) { + if (output_arg.type() != tensorflow::DT_INVALID) { + op->output_data_types.push_back(ConvertDataType(output_arg.type())); + } else if (HasAttr(node, output_arg.type_attr())) { op->output_data_types.push_back( ConvertDataType(GetDataTypeAttr(node, output_arg.type_attr()))); } else { - LOG(INFO) << "Op node missing output type attribute: " << node.name(); + LOG(WARNING) << "Op node missing output type attribute: " + << node.name(); op->output_data_types.clear(); break; } @@ -1999,6 +2015,48 @@ tensorflow::Status ConvertCTCBeamSearchDecoderOperator( return tensorflow::Status::OK(); } +// This isn't a TensorFlow builtin op. Currently this node can only be generated +// with TfLite OpHint API. +tensorflow::Status ConvertUnidirectionalSequenceLstm( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { + DCHECK_EQ(node.op(), "UnidirectionalSequenceLstm"); + + auto* op = new UnidirectionalSequenceLstmOperator(); + const auto& indices = GetListAttr(node, "_tflite_input_indices"); + if (indices.i_size() != node.input().size()) { + return tensorflow::errors::InvalidArgument("Input size does not match."); + } + + // The input size needs to be the same as the TfLite UniDirectionalSequence + // Lstm implementation. + const int kInputsSize = 20; + + op->inputs.resize(kInputsSize); + std::vector done(kInputsSize); + int idx = 0; + for (const string& input : node.input()) { + int real_index = indices.i(idx); + op->inputs[real_index] = (input); + done[real_index] = true; + idx++; + } + + for (int idx = 0; idx < done.size(); idx++) { + if (!done[idx]) { + string optional_name = node.name() + "_" + std::to_string(idx); + model->CreateOptionalArray(optional_name); + op->inputs[idx] = optional_name; + } + } + + // There're three outputs, only the last one is required. + op->outputs.push_back(node.name() + ":2"); + model->operators.emplace_back(op); + + return tensorflow::Status::OK(); +} + } // namespace namespace internal { @@ -2118,6 +2176,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() { {"Transpose", ConvertSimpleOperator}, {"Unpack", ConvertUnpackOperator}, {"ZerosLike", ConvertSimpleOperator}, + {"UnidirectionalSequenceLstm", ConvertUnidirectionalSequenceLstm}, }); } diff --git a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc index 8a236d444460d45942f8644ccbec0b5e5fc18048..0767221b83cb066583dcd63a118015649d25d248 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow_test.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow_test.cc @@ -55,6 +55,13 @@ Status ImportNode(const NodeDef& node, Model* model) { converter); } +Status ImportFlexNode(const NodeDef& node, Model* model) { + // Empty converter => all nodes are flex nodes. + const auto converter = internal::ConverterMapType(); + return internal::ImportTensorFlowNode(node, TensorFlowImportFlags(), model, + converter); +} + Status ImportNode(const NodeDef& node) { Model model; return ImportNode(node, &model); @@ -235,6 +242,21 @@ TEST_P(TypeImportTest, BasicTypeInference) { INSTANTIATE_TEST_CASE_P(BasicTypeInference, TypeImportTest, ::testing::ValuesIn(UnaryTestTypes())); +TEST(ImportTest, TypeInferenceWithFixedOutputType) { + // Create an op that has a fixed output type (bool). + Model model; + EXPECT_TRUE(ImportNode(BuildNode("IsFinite", {{1, 2}, {2, 3}}), &model).ok()); + ASSERT_THAT(model.operators.size(), ::testing::Ge(1)); + ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported); + const TensorFlowUnsupportedOperator* op = + static_cast( + model.operators[0].get()); + + // The static output type should be indicated in the imported op. + ASSERT_THAT(op->output_data_types, + ::testing::ElementsAre(ArrayDataType::kBool)); +} + TEST(ImportTest, FailedTypeInference) { // Create a unary op with no Type ("T") annotation. NodeDef node; @@ -284,5 +306,29 @@ TEST(ImportTest, UnsupportedOpWithWildcardOutputShapes) { ASSERT_TRUE(op->output_shapes.empty()); } +TEST(ImportTest, UnsupportedOpWithMultipleOutputs) { + NodeDef node = BuildNode("Unpack", {}); + + // Unpack's OpDef has a single output which gets multiplied based on the + // "num" attribute of the NodeDef. + AttrValue value_attr; + SetAttrValue(3, &value_attr); // 3 outputs. + (*node.mutable_attr())["num"] = value_attr; + + Model model; + EXPECT_TRUE(ImportFlexNode(node, &model).ok()); + + ASSERT_THAT(model.operators.size(), ::testing::Ge(1)); + ASSERT_EQ(model.operators[0]->type, OperatorType::kUnsupported); + const TensorFlowUnsupportedOperator* op = + static_cast( + model.operators[0].get()); + + ASSERT_EQ(op->outputs.size(), 3); + ASSERT_EQ(op->outputs[0], "Node1"); + ASSERT_EQ(op->outputs[1], "Node1:1"); + ASSERT_EQ(op->outputs[2], "Node1:2"); +} + } // namespace } // namespace toco diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 61f1f095e988ed8ba96aefce3611fd8016fa149b..f3b84430dbdceba65711c4d04c24829ea36f250e 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -58,6 +58,7 @@ enum class OperatorType : uint8 { kL2Normalization, kL2Pool, kLstmCell, + kUnidirectionalSequenceLstm, kLocalResponseNormalization, kLog, kLogistic, @@ -635,6 +636,11 @@ struct LstmCellOperator : Operator { KernelType kernel_type; }; +struct UnidirectionalSequenceLstmOperator : Operator { + UnidirectionalSequenceLstmOperator() + : Operator(OperatorType::kUnidirectionalSequenceLstm) {} +}; + // Element-wise multiplication operator. // // Inputs: diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc index 3b34cd6285cbf243af38344c782978642f71ea47..c23043789c0f16429298cee6cceea2a68d3b02e1 100644 --- a/tensorflow/contrib/lite/toco/tflite/export.cc +++ b/tensorflow/contrib/lite/toco/tflite/export.cc @@ -63,6 +63,20 @@ bool IsControlFlowOp(const string& tensorflow_op) { return false; } +// Check if a TensorFlow Op is unsupportred by the Flex runtime. +bool IsUnsupportedFlexOp(const string& tensorflow_op) { + if (IsControlFlowOp(tensorflow_op)) { + return true; + } + // `HashTableV2` isn't supported for now since it requires an additinonal + // initialization step. + // TODO(b/117651199): Support `HashTableV2` with Flex runtime. + if (tensorflow_op == "HashTableV2") { + return true; + } + return false; +} + // Map from operator name to TF Lite enum value, for all builtins. const std::map& GetBuiltinOpsMap() { static std::map* builtin_ops = nullptr; @@ -150,7 +164,7 @@ OperatorKey GetOperatorKey( } if (key.is_flex_op) { - if (IsControlFlowOp(key.flex_tensorflow_op)) { + if (IsUnsupportedFlexOp(key.flex_tensorflow_op)) { key.is_unsupported_flex_op = true; } } diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc index eda1aa78a37b64fecfaa38822e922e25b1613dd0..13e3ba6d5f4c5d19c9195cfff8e10b010c92710c 100644 --- a/tensorflow/contrib/lite/toco/tflite/export_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc @@ -383,6 +383,21 @@ TEST(OperatorKeyTest, TestFlexWithControlFlowOp) { EXPECT_TRUE(key.is_unsupported_flex_op); } +TEST(OperatorKeyTest, TestFlexWithUnsupportedOp) { + auto op = absl::make_unique(); + op->tensorflow_op = "HashTableV2"; + + const auto ops_by_type = BuildOperatorByTypeMap(); + const auto key = details::GetOperatorKey(*op, ops_by_type, true); + + EXPECT_EQ(key.type, ::tflite::BuiltinOperator_CUSTOM); + EXPECT_EQ(key.custom_code, "FlexHashTableV2"); + EXPECT_EQ(key.version, 1); + EXPECT_TRUE(key.is_flex_op); + // The control flow ops should be marked as unsupported. + EXPECT_TRUE(key.is_unsupported_flex_op); +} + TEST(OperatorKeyTest, TestFlexWithPartiallySupportedOps) { // Test Toco-supported/TFLite-unsupported operators. // TODO(ycling): The test will be broken if Range is implemented in TFLite. diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index ed37535fe06de9611d738c40e8147b04496d39bc..e08a61d357d9c217ed6a60468fdaeb30e1baeae1 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -741,6 +741,42 @@ class Lstm : public BuiltinOperator { + public: + using BuiltinOperator::BuiltinOperator; + flatbuffers::Offset WriteOptions( + const TocoOperator& op, + flatbuffers::FlatBufferBuilder* builder) const override { + // Current toco converter only supports tanh, no clip. + return ::tflite::CreateUnidirectionalSequenceLSTMOptions( + *builder, /*fused_activation_function=*/ + ::tflite::ActivationFunctionType_TANH, + /*cell_clip=*/0.0, + /*proj_clip=*/0.0); + } + + void ReadOptions(const TfLiteOptions& options, + TocoOperator* op) const override { + // Only support tanh activation, so check that tflite type is tanh. + DCHECK(options.fused_activation_function() == + ::tflite::ActivationFunctionType_TANH); + } + + int GetVersion(const Operator& op) const override { return 1; } + + std::vector GetMutatingInputVariables( + const Operator& op) const override { + std::vector mutating_input_variables(op.inputs.size(), false); + mutating_input_variables[kInputActivationStateTensor] = true; + mutating_input_variables[kInputCellStateTensor] = true; + return mutating_input_variables; + } +}; + class Mean : public BuiltinOperator { public: @@ -1435,6 +1471,9 @@ std::vector> BuildOperatorList( OperatorType::kFakeQuant)); ops.push_back( MakeUnique(::tflite::BuiltinOperator_PACK, OperatorType::kPack)); + ops.emplace_back(MakeUnique( + ::tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM, + OperatorType::kUnidirectionalSequenceLstm)); ops.push_back(MakeUnique(::tflite::BuiltinOperator_ONE_HOT, OperatorType::kOneHot)); ops.push_back(MakeUnique(::tflite::BuiltinOperator_UNPACK, diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index e3f27e9e2a43e0a7f6659c305beb01905c55dee7..0deaf56593b45214f27435c659426984ce3ff533 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -407,6 +407,7 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(CTCBeamSearchDecoder) HANDLE_OPERATORTYPENAME_CASE(Unpack) HANDLE_OPERATORTYPENAME_CASE(ZerosLike) + HANDLE_OPERATORTYPENAME_CASE(UnidirectionalSequenceLstm) default: LOG(FATAL) << "Unhandled op type"; #undef HANDLE_OPERATORTYPENAME_CASE @@ -737,15 +738,41 @@ bool CompareArrayBuffers(const Array& lhs_array, const Array& rhs_array) { } return true; } + +bool HaveSameMinMax(const Array& lhs_array, const Array& rhs_array) { + if (lhs_array.minmax || rhs_array.minmax) { + if (!lhs_array.minmax || !rhs_array.minmax) { + return false; + } + if (!(*lhs_array.minmax == *rhs_array.minmax)) { + return false; + } + } + return true; +} + +bool HaveSameQuantizationParams(const Array& lhs_array, + const Array& rhs_array) { + if (lhs_array.quantization_params || rhs_array.quantization_params) { + if (!lhs_array.quantization_params || !rhs_array.quantization_params) { + return false; + } + if (!(*lhs_array.quantization_params == *rhs_array.quantization_params)) { + return false; + } + } + return true; +} + } // namespace bool CompareConstantArrays(const Array& lhs_array, const Array& rhs_array) { - bool attrs_equal = - lhs_array.shape() == rhs_array.shape() && - lhs_array.data_type == rhs_array.data_type && - lhs_array.final_data_type == rhs_array.final_data_type && - lhs_array.minmax == rhs_array.minmax && - lhs_array.quantization_params == rhs_array.quantization_params; + bool attrs_equal = lhs_array.shape() == rhs_array.shape() && + lhs_array.data_type == rhs_array.data_type && + lhs_array.final_data_type == rhs_array.final_data_type && + HaveSameMinMax(lhs_array, rhs_array) && + HaveSameQuantizationParams(lhs_array, rhs_array) && + lhs_array.narrow_range == rhs_array.narrow_range; if (!attrs_equal) { return false; } @@ -898,12 +925,12 @@ void CheckNoMissingArray(const Model& model) { void FixNoMissingArray(Model* model) { for (const auto& op : model->operators) { for (const auto& input : op->inputs) { - if (!model->HasArray(input)) { + if (!model->HasArray(input) && !model->IsOptionalArray(input)) { model->GetOrCreateArray(input); } } for (const auto& output : op->outputs) { - if (!model->HasArray(output)) { + if (!model->HasArray(output) && !model->IsOptionalArray(output)) { model->GetOrCreateArray(output); } } @@ -1237,11 +1264,15 @@ void DedupeConstantArrays(Model* model, size_t min_size) { lhs_array.final_data_type != ArrayDataType::kNone ? lhs_array.final_data_type : lhs_array.data_type; - size_t array_byte_size = - lhs_array.buffer->Length() * ElementSize(final_data_type); - if (array_byte_size < min_size) { - // Too small; skip. - continue; + // Ignore small arrays, don't check string arrays because it is not possible + // to estimate its size. + if (final_data_type != ArrayDataType::kString) { + size_t array_byte_size = + lhs_array.buffer->Length() * ElementSize(final_data_type); + if (array_byte_size < min_size) { + // Too small; skip. + continue; + } } auto next_lhs_array_it = lhs_array_it; @@ -2173,6 +2204,8 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) { return ArrayDataType::kInt64; case BOOL: return ArrayDataType::kBool; + case STRING: + return ArrayDataType::kString; default: return ArrayDataType::kNone; } diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD index 71bf61657ea165e43099c32d83512e7d8431a346..af544211557104ab05d161a829de22d62155a3f5 100644 --- a/tensorflow/contrib/lite/tools/benchmark/BUILD +++ b/tensorflow/contrib/lite/tools/benchmark/BUILD @@ -32,6 +32,7 @@ cc_binary( deps = [ ":benchmark_tflite_model_lib", ":logging", + "//tensorflow/contrib/lite/testing:init_tensorflow", ], ) @@ -52,6 +53,7 @@ cc_binary( ":benchmark_tflite_model_lib", ":logging", "//tensorflow/contrib/lite/delegates/flex:delegate", + "//tensorflow/contrib/lite/testing:init_tensorflow", ], ) diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc index 372d31e838e5666df492ee3156022249a2d97691..b9cf6c67d2fe941e180b0c80b58c5e8d837dd4ff 100644 --- a/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_main.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/lite/testing/init_tensorflow.h" #include "tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h" #include "tensorflow/contrib/lite/tools/benchmark/logging.h" @@ -20,6 +21,7 @@ namespace tflite { namespace benchmark { int Main(int argc, char** argv) { + ::tflite::InitTensorFlow(); #ifdef TFLITE_CUSTOM_OPS_HEADER TFLITE_LOG(INFO) << "STARTING with custom ops!"; #else diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index f83765a48d8d3adaec84460e32c34aa68a35ab09..5abef822e82a1e9f818e54e32c2980a985d41ad8 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -292,8 +292,8 @@ def index_to_string(tensor, mapping, default_value="UNK", name=None): class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): """A generic mutable hash table implementation. - Data can be inserted by calling the insert method. It does not support - initialization via the init method. + Data can be inserted by calling the insert method and removed by calling the + remove method. It does not support initialization via the init method. Example usage: @@ -391,6 +391,34 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): with ops.colocate_with(self._table_ref): return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name) + def remove(self, keys, name=None): + """Removes `keys` and its associated values from the table. + + If a key is not present in the table, it is silently ignored. + + Args: + keys: Keys to remove. Can be a tensor of any shape. Must match the table's + key type. + name: A name for the operation (optional). + + Returns: + The created Operation. + + Raises: + TypeError: when `keys` do not match the table data types. + """ + if keys.dtype != self._key_dtype: + raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % + (self._key_dtype, keys.dtype)) + + with ops.name_scope(name, "%s_lookup_table_remove" % self._name, + (self._table_ref, keys, self._default_value)) as name: + # pylint: disable=protected-access + op = gen_lookup_ops.lookup_table_remove_v2( + self._table_ref, keys, name=name) + + return op + def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -487,11 +515,11 @@ class MutableHashTable(LookupInterface, checkpointable.CheckpointableBase): class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): """A generic mutable hash table implementation using tensors as backing store. - Data can be inserted by calling the insert method. It does not support - initialization via the init method. + Data can be inserted by calling the insert method and removed by calling the + remove method. It does not support initialization via the init method. It uses "open addressing" with quadratic reprobing to resolve collisions. - Compared to `MutableHashTable` the insert and lookup operations in a + Compared to `MutableHashTable` the insert, remove and lookup operations in a `MutableDenseHashTable` are typically faster, but memory usage can be higher. However, `MutableDenseHashTable` does not require additional memory for temporary tensors created during checkpointing and restore operations. @@ -502,7 +530,9 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): table = tf.contrib.lookup.MutableDenseHashTable(key_dtype=tf.int64, value_dtype=tf.int64, default_value=-1, - empty_key=0) + empty_key=0, + deleted_key=-1) + sess.run(table.insert(keys, values)) out = table.lookup(query_keys) print(out.eval()) @@ -516,6 +546,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): value_dtype, default_value, empty_key, + deleted_key, initial_num_buckets=None, shared_name=None, name="MutableDenseHashTable", @@ -530,7 +561,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): value_dtype: the type of the value tensors. default_value: The value to use if a key is missing in the table. empty_key: the key to use to represent empty buckets internally. Must not - be used in insert or lookup operations. + be used in insert, remove or lookup operations. initial_num_buckets: the initial number of buckets. shared_name: If non-empty, this table will be shared under the given name across multiple sessions. @@ -538,9 +569,12 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): checkpoint: if True, the contents of the table are saved to and restored from checkpoints. If `shared_name` is empty for a checkpointed table, it is shared using the table node name. + deleted_key: the key to use to represent deleted buckets internally. Must + not be used in insert, remove or lookup operations and be different from + the empty_key. Returns: - A `MutableHashTable` object. + A `MutableDenseHashTable` object. Raises: ValueError: If checkpoint is True and no name was specified. @@ -555,6 +589,8 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): use_node_name_sharing = checkpoint and shared_name is None empty_key = ops.convert_to_tensor( empty_key, dtype=key_dtype, name="empty_key") + deleted_key = ops.convert_to_tensor( + deleted_key, dtype=key_dtype, name="deleted_key") executing_eagerly = context.executing_eagerly() if executing_eagerly and shared_name is None: # TODO(allenl): This will leak memory due to kernel caching by the @@ -564,6 +600,7 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): shared_name = "table_%d" % (ops.uid(),) self._table_ref = gen_lookup_ops.mutable_dense_hash_table_v2( empty_key=empty_key, + deleted_key=deleted_key, shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, value_dtype=value_dtype, @@ -648,6 +685,34 @@ class MutableDenseHashTable(LookupInterface, checkpointable.CheckpointableBase): self._table_ref, keys, values, name=name) return op + def remove(self, keys, name=None): + """Removes `keys` and its associated values from the table. + + If a key is not present in the table, it is silently ignored. + + Args: + keys: Keys to remove. Can be a tensor of any shape. Must match the table's + key type. + name: A name for the operation (optional). + + Returns: + The created Operation. + + Raises: + TypeError: when `keys` do not match the table data types. + """ + if keys.dtype != self._key_dtype: + raise TypeError("Signature mismatch. Keys must be dtype %s, got %s." % + (self._key_dtype, keys.dtype)) + + with ops.name_scope(name, "%s_lookup_table_remove" % self._name, + (self._table_ref, keys, self._default_value)) as name: + # pylint: disable=protected-access + op = gen_lookup_ops.lookup_table_remove_v2( + self._table_ref, keys, name=name) + + return op + def export(self, name=None): """Returns tensors of all keys and values in the table. diff --git a/tensorflow/contrib/lookup/lookup_ops_test.py b/tensorflow/contrib/lookup/lookup_ops_test.py index 9e9345e875648f1de927e5591a6e2a8094856921..35b0d1bc4447cdd3964dbde6899deca590942c84 100644 --- a/tensorflow/contrib/lookup/lookup_ops_test.py +++ b/tensorflow/contrib/lookup/lookup_ops_test.py @@ -303,13 +303,17 @@ class MutableHashTableOpTest(test.TestCase): def testMutableHashTable(self): with self.cached_session(): default_val = -1 - keys = constant_op.constant(["brain", "salad", "surgery"]) - values = constant_op.constant([0, 1, 2], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"]) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["tarkus", "tank"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant(["brain", "salad", "tank"]) @@ -472,13 +476,18 @@ class MutableHashTableOpTest(test.TestCase): def testMutableHashTableOfTensors(self): with self.cached_session(): default_val = constant_op.constant([-1, -1], dtypes.int64) - keys = constant_op.constant(["brain", "salad", "surgery"]) - values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery", "tarkus"]) + values = constant_op.constant([[0, 1], [2, 3], [4, 5], [6, 7]], + dtypes.int64) table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["tarkus", "tank"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant(["brain", "salad", "tank"]) @@ -624,6 +633,26 @@ class MutableHashTableOpTest(test.TestCase): result = output.eval() self.assertAllEqual([0, 1, 3, -1], result) + def testMutableHashTableRemoveHighRank(self): + with self.test_session(): + default_val = -1 + keys = constant_op.constant([["brain", "salad"], ["surgery", "tank"]]) + values = constant_op.constant([[0, 1], [2, 3]], dtypes.int64) + table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) + + table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["salad", "tarkus"]) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + + input_string = constant_op.constant(["brain", "salad", "tank", "tarkus"]) + output = table.lookup(input_string) + + result = output.eval() + self.assertAllEqual([0, -1, 3, -1], result) + def testMutableHashTableOfTensorsFindHighRank(self): with self.cached_session(): default_val = constant_op.constant([-1, -1, -1], dtypes.int64) @@ -645,6 +674,30 @@ class MutableHashTableOpTest(test.TestCase): self.assertAllEqual( [[[0, 1, 2], [2, 3, 4]], [[-1, -1, -1], [-1, -1, -1]]], result) + def testMutableHashTableOfTensorsRemoveHighRank(self): + with self.test_session(): + default_val = constant_op.constant([-1, -1, -1], dtypes.int64) + keys = constant_op.constant(["brain", "salad", "surgery"]) + values = constant_op.constant([[0, 1, 2], [2, 3, 4], [4, 5, 6]], + dtypes.int64) + table = lookup.MutableHashTable(dtypes.string, dtypes.int64, default_val) + + table.insert(keys, values).run() + self.assertAllEqual(3, table.size().eval()) + + remove_string = constant_op.constant([["brain", "tank"]]) + table.remove(remove_string).run() + self.assertAllEqual(2, table.size().eval()) + + input_string = constant_op.constant([["brain", "salad"], + ["surgery", "tank"]]) + output = table.lookup(input_string) + self.assertAllEqual([2, 2, 3], output.get_shape()) + + result = output.eval() + self.assertAllEqual( + [[[-1, -1, -1], [2, 3, 4]], [[4, 5, 6], [-1, -1, -1]]], result) + def testMultipleMutableHashTables(self): with self.cached_session() as sess: default_val = -1 @@ -792,13 +845,22 @@ class MutableDenseHashTableOpTest(test.TestCase): def testBasic(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([12, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant([11, 12, 15], dtypes.int64) @@ -806,17 +868,26 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual([3], output.get_shape()) result = output.eval() - self.assertAllEqual([0, 1, -1], result) + self.assertAllEqual([0, -1, -1], result) def testBasicBool(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([True, True, True], dtypes.bool) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([True, True, True, True], dtypes.bool) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.bool, default_value=False, empty_key=0) + dtypes.int64, + dtypes.bool, + default_value=False, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([11, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) input_string = constant_op.constant([11, 12, 15], dtypes.int64) @@ -824,14 +895,30 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual([3], output.get_shape()) result = output.eval() - self.assertAllEqual([True, True, False], result) + self.assertAllEqual([False, True, False], result) + + def testSameEmptyAndDeletedKey(self): + with self.cached_session(): + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=42) + self.assertAllEqual(0, table.size().eval()) def testLookupUnknownShape(self): with self.cached_session(): keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) table.insert(keys, values).run() self.assertAllEqual(3, table.size().eval()) @@ -844,45 +931,60 @@ class MutableDenseHashTableOpTest(test.TestCase): def testMapStringToFloat(self): with self.cached_session(): - keys = constant_op.constant(["a", "b", "c"], dtypes.string) - values = constant_op.constant([0.0, 1.1, 2.2], dtypes.float32) + + keys = constant_op.constant(["a", "b", "c", "d"], dtypes.string) + values = constant_op.constant([0.0, 1.1, 2.2, 3.3], dtypes.float32) default_value = constant_op.constant(-1.5, dtypes.float32) table = lookup.MutableDenseHashTable( dtypes.string, dtypes.float32, default_value=default_value, - empty_key="") + empty_key="", + deleted_key="$") self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant(["b", "e"]) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) - input_string = constant_op.constant(["a", "b", "d"], dtypes.string) + input_string = constant_op.constant(["a", "b", "d", "e"], dtypes.string) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllClose([0, 1.1, -1.5], result) + self.assertAllClose([0, -1.5, 3.3, -1.5], result) def testMapInt64ToFloat(self): for float_dtype in [dtypes.float32, dtypes.float64]: with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0.0, 1.1, 2.2], float_dtype) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0.0, 1.1, 2.2, 3.3], float_dtype) default_value = constant_op.constant(-1.5, float_dtype) table = lookup.MutableDenseHashTable( - dtypes.int64, float_dtype, default_value=default_value, empty_key=0) + dtypes.int64, + float_dtype, + default_value=default_value, + empty_key=0, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + remove_string = constant_op.constant([12, 15], dtypes.int64) + table.remove(remove_string).run() self.assertAllEqual(3, table.size().eval()) - input_string = constant_op.constant([11, 12, 15], dtypes.int64) + input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllClose([0, 1.1, -1.5], result) + self.assertAllClose([0, -1.5, 3.3, -1.5], result) def testVectorValues(self): with self.cached_session(): @@ -895,6 +997,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=0, + deleted_key=-1, initial_num_buckets=4) self.assertAllEqual(0, table.size().eval()) @@ -908,26 +1011,35 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(4, table.size().eval()) self.assertAllEqual(8, len(table.export()[0].eval())) - input_string = constant_op.constant([11, 12, 15], dtypes.int64) + remove_string = constant_op.constant([12, 16], dtypes.int64) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + self.assertAllEqual(8, len(table.export()[0].eval())) + + input_string = constant_op.constant([11, 12, 14, 15], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual( - [3, 4], output.shape, msg="Saw shape: %s" % output.shape) + self.assertAllEqual([4, 4], + output.shape, + msg="Saw shape: %s" % output.shape) result = output.eval() - self.assertAllEqual([[0, 1, 2, 3], [3, 4, 5, 6], [-1, -2, -3, -4]], - result) + self.assertAllEqual( + [[0, 1, 2, 3], [-1, -2, -3, -4], [2, 3, 4, 5], [-1, -2, -3, -4]], + result) def testVectorKeys(self): with self.cached_session(): keys = constant_op.constant([[0, 1], [1, 2], [1, 3]], dtypes.int64) values = constant_op.constant([10, 11, 12], dtypes.int64) empty_key = constant_op.constant([0, 3], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) @@ -940,13 +1052,18 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(4, table.size().eval()) self.assertAllEqual(8, len(table.export()[0].eval())) - input_string = constant_op.constant([[0, 1], [1, 2], [0, 2]], + remove_string = constant_op.constant([[1, 2], [7, 8]], dtypes.int64) + table.remove(remove_string).run() + self.assertAllEqual(3, table.size().eval()) + self.assertAllEqual(8, len(table.export()[0].eval())) + + input_string = constant_op.constant([[0, 1], [1, 2], [1, 3], [0, 2]], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([3], output.get_shape()) + self.assertAllEqual([4], output.get_shape()) result = output.eval() - self.assertAllEqual([10, 11, -1], result) + self.assertAllEqual([10, -1, 12, -1], result) def testResize(self): with self.cached_session(): @@ -957,6 +1074,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=0, + deleted_key=-1, initial_num_buckets=4) self.assertAllEqual(0, table.size().eval()) @@ -964,31 +1082,42 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(4, len(table.export()[0].eval())) - keys2 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64) - values2 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64) + keys2 = constant_op.constant([12, 99], dtypes.int64) + table.remove(keys2).run() + self.assertAllEqual(2, table.size().eval()) + self.assertAllEqual(4, len(table.export()[0].eval())) + + keys3 = constant_op.constant([13, 14, 15, 16, 17], dtypes.int64) + values3 = constant_op.constant([3, 4, 5, 6, 7], dtypes.int64) - table.insert(keys2, values2).run() - self.assertAllEqual(7, table.size().eval()) + table.insert(keys3, values3).run() + self.assertAllEqual(6, table.size().eval()) self.assertAllEqual(16, len(table.export()[0].eval())) - keys3 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18], + keys4 = constant_op.constant([10, 11, 12, 13, 14, 15, 16, 17, 18], dtypes.int64) - output = table.lookup(keys3) - self.assertAllEqual([-1, 0, 1, 3, 4, 5, 6, 7, -1], output.eval()) + output = table.lookup(keys4) + self.assertAllEqual([-1, 0, -1, 3, 4, 5, 6, 7, -1], output.eval()) def testExport(self): with self.cached_session(): - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([1, 2, 3], dtypes.int64) + + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([1, 2, 3, 4], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=-1, empty_key=100, + deleted_key=200, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + + keys2 = constant_op.constant([12, 15], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) exported_keys, exported_values = table.export() @@ -1005,8 +1134,8 @@ class MutableDenseHashTableOpTest(test.TestCase): pairs = np.dstack((np_keys.flatten(), np_values.flatten()))[0] # sort by key pairs = pairs[pairs[:, 0].argsort()] - self.assertAllEqual([[11, 1], [12, 2], [13, 3], [100, 0], [100, 0], - [100, 0], [100, 0], [100, 0]], pairs) + self.assertAllEqual([[11, 1], [13, 3], [14, 4], [100, 0], [100, 0], + [100, 0], [100, 0], [200, 2]], pairs) def testSaveRestore(self): save_dir = os.path.join(self.get_temp_dir(), "save_restore") @@ -1015,13 +1144,15 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: default_value = -1 empty_key = 0 - keys = constant_op.constant([11, 12, 13], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + deleted_key = -1 + keys = constant_op.constant([11, 12, 13, 14], dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1030,6 +1161,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([12, 15], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1043,6 +1179,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1062,7 +1199,7 @@ class MutableDenseHashTableOpTest(test.TestCase): input_string = constant_op.constant([10, 11, 12, 13, 14], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([-1, 0, 1, 2, -1], output.eval()) + self.assertAllEqual([-1, 0, -1, 2, 3], output.eval()) @test_util.run_in_graph_and_eager_modes def testObjectSaveRestore(self): @@ -1071,6 +1208,7 @@ class MutableDenseHashTableOpTest(test.TestCase): default_value = -1 empty_key = 0 + deleted_key = -1 keys = constant_op.constant([11, 12, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) save_table = lookup.MutableDenseHashTable( @@ -1078,6 +1216,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1097,6 +1236,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1124,14 +1264,18 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-2, -3], dtypes.int64) default_value = constant_op.constant([-1, -2], dtypes.int64) - keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64) - values = constant_op.constant([[0, 1], [2, 3], [4, 5]], dtypes.int64) + keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]], + dtypes.int64) + values = constant_op.constant([[0, 1], [2, 3], [2, 4], [4, 5]], + dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=32) @@ -1140,6 +1284,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([[12, 13], [16, 17]], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1149,12 +1298,14 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-2, -3], dtypes.int64) default_value = constant_op.constant([-1, -2], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t1", checkpoint=True, initial_num_buckets=64) @@ -1184,14 +1335,17 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) - keys = constant_op.constant([[11, 12], [11, 14], [13, 14]], dtypes.int64) - values = constant_op.constant([0, 1, 2], dtypes.int64) + keys = constant_op.constant([[11, 12], [11, 14], [12, 13], [13, 14]], + dtypes.int64) + values = constant_op.constant([0, 1, 2, 3], dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t2", checkpoint=True, initial_num_buckets=32) @@ -1200,6 +1354,11 @@ class MutableDenseHashTableOpTest(test.TestCase): self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() + self.assertAllEqual(4, table.size().eval()) + self.assertAllEqual(32, len(table.export()[0].eval())) + + keys2 = constant_op.constant([[12, 13], [15, 16]], dtypes.int64) + table.remove(keys2).run() self.assertAllEqual(3, table.size().eval()) self.assertAllEqual(32, len(table.export()[0].eval())) @@ -1209,12 +1368,14 @@ class MutableDenseHashTableOpTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: empty_key = constant_op.constant([11, 13], dtypes.int64) + deleted_key = constant_op.constant([-1, -1], dtypes.int64) default_value = constant_op.constant(-1, dtypes.int64) table = lookup.MutableDenseHashTable( dtypes.int64, dtypes.int64, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name="t2", checkpoint=True, initial_num_buckets=64) @@ -1235,7 +1396,7 @@ class MutableDenseHashTableOpTest(test.TestCase): input_string = constant_op.constant( [[11, 12], [11, 14], [11, 15], [13, 14], [13, 15]], dtypes.int64) output = table.lookup(input_string) - self.assertAllEqual([0, 1, -1, 2, -1], output.eval()) + self.assertAllEqual([0, 1, -1, 3, -1], output.eval()) def testReprobe(self): with self.cached_session(): @@ -1248,6 +1409,7 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=0, + deleted_key=-1, initial_num_buckets=8) self.assertAllEqual(0, table.size().eval()) @@ -1267,7 +1429,11 @@ class MutableDenseHashTableOpTest(test.TestCase): keys = constant_op.constant([11, 0, 13], dtypes.int64) values = constant_op.constant([0, 1, 2], dtypes.int64) table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=12) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=12, + deleted_key=-1) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() @@ -1283,19 +1449,35 @@ class MutableDenseHashTableOpTest(test.TestCase): def testErrors(self): with self.cached_session(): table = lookup.MutableDenseHashTable( - dtypes.int64, dtypes.int64, default_value=-1, empty_key=0) + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=0, + deleted_key=-1) # Inserting the empty key returns an error - keys = constant_op.constant([11, 0], dtypes.int64) - values = constant_op.constant([0, 1], dtypes.int64) + keys1 = constant_op.constant([11, 0], dtypes.int64) + values1 = constant_op.constant([0, 1], dtypes.int64) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "empty_key"): - table.insert(keys, values).run() + table.insert(keys1, values1).run() # Looking up the empty key returns an error with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "empty_key"): - table.lookup(keys).eval() + table.lookup(keys1).eval() + + # Inserting the deleted key returns an error + keys2 = constant_op.constant([11, -1], dtypes.int64) + values2 = constant_op.constant([0, 1], dtypes.int64) + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table.insert(keys2, values2).run() + + # Looking up the empty key returns an error + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "deleted_key"): + table.lookup(keys2).eval() # Arbitrary tensors of keys are not supported keys = constant_op.constant([[11, 0], [12, 1]], dtypes.int64) @@ -1312,11 +1494,43 @@ class MutableDenseHashTableOpTest(test.TestCase): dtypes.int64, default_value=-1, empty_key=17, + deleted_key=-1, initial_num_buckets=12) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, "Number of buckets must be"): self.assertAllEqual(0, table2.size().eval()) + with self.assertRaisesRegexp( + errors_impl.InvalidArgumentError, + "Empty and deleted keys must have same shape"): + table3 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=[1, 2]) + self.assertAllEqual(0, table3.size().eval()) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "Empty and deleted keys cannot be equal"): + table4 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=42, + deleted_key=42) + self.assertAllEqual(0, table4.size().eval()) + + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "Empty and deleted keys cannot be equal"): + table5 = lookup.MutableDenseHashTable( + dtypes.int64, + dtypes.int64, + default_value=-1, + empty_key=[1, 2, 3], + deleted_key=[1, 2, 3]) + self.assertAllEqual(0, table5.size().eval()) + class IndexTableFromFile(test.TestCase): @@ -2558,7 +2772,11 @@ class MutableDenseHashTableBenchmark(MutableHashTableBenchmark): def _create_table(self): return lookup.MutableDenseHashTable( - dtypes.int64, dtypes.float32, default_value=0.0, empty_key=-1) + dtypes.int64, + dtypes.float32, + default_value=0.0, + empty_key=-1, + deleted_key=-2) if __name__ == "__main__": diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 651de4e2f446b2da39b000cde2541872116cbdba..619294b51822bd9983eda777acae5cf0d253926d 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -22,6 +22,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.framework.python.ops import add_arg_scope +from tensorflow.python.compat import compat from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -82,6 +83,8 @@ def _safe_div(numerator, denominator, name="value"): Returns: The element-wise value of the numerator divided by the denominator. """ + if compat.forward_compatible(2018, 11, 1): + return math_ops.div_no_nan(numerator, denominator, name=name) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, @@ -104,7 +107,7 @@ def _safe_mean(losses, num_present): then zero is returned. """ total_loss = math_ops.reduce_sum(losses) - return _safe_div(total_loss, num_present) + return _safe_div(total_loss, num_present, name="value") @deprecated("2016-12-30", "Use tf.losses.compute_weighted_loss instead.") @@ -609,11 +612,14 @@ def mean_pairwise_squared_error(predictions, math_ops.square(diffs), reduction_indices=reduction_indices) num_present_per_batch = _num_present(diffs, weights, per_batch=True) - term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, num_present_per_batch) + term1 = 2.0 * _safe_div(sum_squares_diff_per_batch, + num_present_per_batch, + name="value") sum_diff = math_ops.reduce_sum(diffs, reduction_indices=reduction_indices) - term2 = 2.0 * _safe_div( - math_ops.square(sum_diff), math_ops.square(num_present_per_batch)) + term2 = 2.0 * _safe_div(math_ops.square(sum_diff), + math_ops.square(num_present_per_batch), + name="value") loss = _scale_losses(term1 - term2, weights) diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index bbf5d3f30c9f7fd0cbe2ad78da15ff3eb34ae2c5..d6932f6e4b603b1a76250ab622f5fe8eaea81bc9 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -24,6 +24,7 @@ from __future__ import print_function import collections as collections_lib +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -45,22 +46,30 @@ from tensorflow.python.util.deprecation import deprecated _EPSILON = 1e-7 -def _safe_div(numerator, denominator, name): - """Divides two values, returning 0 if the denominator is <= 0. +def _safe_div(numerator, denominator): + """Computes a safe divide which returns 0 if the denominator is zero. + + Note that the function contains an additional conditional check that is + necessary for avoiding situations where the loss is zero causing NaNs to + creep into the gradient computation. Args: - numerator: A real `Tensor`. - denominator: A real `Tensor`, with dtype matching `numerator`. - name: Name for the returned op. + numerator: An arbitrary `Tensor`. + denominator: A `Tensor` whose shape matches `numerator` and whose values are + assumed to be non-negative. Returns: - 0 if `denominator` <= 0, else `numerator` / `denominator` + The element-wise value of the numerator divided by the denominator. """ + if compat.forward_compatible(2018, 11, 1): + return math_ops.div_no_nan(numerator, denominator) return array_ops.where( math_ops.greater(denominator, 0), - math_ops.truediv(numerator, denominator), - 0, - name=name) + math_ops.div(numerator, + array_ops.where( + math_ops.equal(denominator, 0), + array_ops.ones_like(denominator), denominator)), + array_ops.zeros_like(numerator)) @deprecated(None, 'Please switch to tf.metrics.true_positives. Note that the ' @@ -3239,11 +3248,11 @@ def streaming_covariance(predictions, # We update the means by Delta=Error*BatchCount/(BatchCount+PrevCount) # batch_mean_prediction is E[x_B] in the update equation batch_mean_prediction = _safe_div( - math_ops.reduce_sum(weighted_predictions), batch_count, - 'batch_mean_prediction') + math_ops.reduce_sum(weighted_predictions), + batch_count) delta_mean_prediction = _safe_div( - (batch_mean_prediction - mean_prediction) * batch_count, update_count, - 'delta_mean_prediction') + (batch_mean_prediction - mean_prediction) * batch_count, + update_count) update_mean_prediction = state_ops.assign_add(mean_prediction, delta_mean_prediction) # prev_mean_prediction is E[x_A] in the update equation @@ -3251,9 +3260,11 @@ def streaming_covariance(predictions, # batch_mean_label is E[y_B] in the update equation batch_mean_label = _safe_div( - math_ops.reduce_sum(weighted_labels), batch_count, 'batch_mean_label') - delta_mean_label = _safe_div((batch_mean_label - mean_label) * batch_count, - update_count, 'delta_mean_label') + math_ops.reduce_sum(weighted_labels), + batch_count) + delta_mean_label = _safe_div( + (batch_mean_label - mean_label) * batch_count, + update_count) update_mean_label = state_ops.assign_add(mean_label, delta_mean_label) # prev_mean_label is E[y_A] in the update equation prev_mean_label = update_mean_label - delta_mean_label @@ -3915,8 +3926,9 @@ def cohen_kappa(labels, po_sum = math_ops.reduce_sum(po) total = math_ops.reduce_sum(pe_row) pe_sum = math_ops.reduce_sum( - metrics_impl._safe_div( # pylint: disable=protected-access - pe_row * pe_col, total, None)) + _safe_div( + math_ops.to_double(pe_row * pe_col), + math_ops.to_double(total))) po_sum, pe_sum, total = (math_ops.to_double(po_sum), math_ops.to_double(pe_sum), math_ops.to_double(total)) diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index 67e58ff15d83888d55364c02d4e04483ab86d096..d2b811641764df05c66654dfcb044fa7e78853a5 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -210,7 +210,7 @@ def get_pruning_hparams(): block_height=1, block_width=1, block_pooling_function='AVG', - initial_sparsity=0, + initial_sparsity=0.0, target_sparsity=0.5, sparsity_function_begin_step=0, sparsity_function_end_step=100, diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index 423a8689aeee062fb58eaf9d6d9b980b0998754e..a39efedaf0406e8eb405d6ca179bff6a9d1ba9c0 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function from functools import partial +import os import numpy as np from tensorflow.contrib import nccl @@ -60,6 +61,7 @@ class NcclTestCase(test.TestCase): def _Test(self, nccl_reduce, numpy_fn, + dtypes=[np.float16, np.float32, np.int32, np.int64, np.float64], device_sets=(['/device:GPU:1', '/device:GPU:2', '/device:GPU:0'], ['/device:GPU:1', '/device:GPU:0'])): """Tests that nccl_reduce does the same as reduction with numpy_fn. @@ -72,7 +74,10 @@ class NcclTestCase(test.TestCase): two. device_sets: Tuple of virtual devices to run test on. """ - for dtype in [np.float16, np.float32, np.int32, np.int64, np.float64]: + # Enable NCCL printouts. + os.environ["NCCL_DEBUG"] = "INFO" + + for dtype in dtypes: # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: @@ -124,7 +129,8 @@ class NcclTestCase(test.TestCase): reduce_tensors, inputs, losses, colocate_gradients_with_ops=True) return [g for g in grads if g is not None] - self._Test(_Gradient, numpy_fn) + # int types are considered not 'trainable' and no gradients are generated. + self._Test(_Gradient, numpy_fn, dtypes=[np.float16, np.float32, np.float64]) class AllReduceTest(NcclTestCase): diff --git a/tensorflow/contrib/opt/python/training/adamax_test.py b/tensorflow/contrib/opt/python/training/adamax_test.py index 61d8b94eca27427754cb2806f33d95e5643c660f..a1e220924f3a7c37aa9c9f3c3c1cc479b9a95bc0 100644 --- a/tensorflow/contrib/opt/python/training/adamax_test.py +++ b/tensorflow/contrib/opt/python/training/adamax_test.py @@ -129,7 +129,7 @@ class AdaMaxOptimizerTest(test.TestCase): def testSparseDevicePlacement(self): for index_dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(force_gpu=test.is_gpu_available()): + with self.cached_session(force_gpu=test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = variables.Variable([[1.0], [2.0]]) diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py index 6150fa117fa17f1d5bb668a71f1abcd78d1f89b8..2c74acd9fff805aeaeec64e75c38eaa60ede66fd 100644 --- a/tensorflow/contrib/opt/python/training/addsign_test.py +++ b/tensorflow/contrib/opt/python/training/addsign_test.py @@ -66,7 +66,7 @@ class AddSignTest(test.TestCase): alpha=1.0, beta=0.9): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -169,7 +169,7 @@ class AddSignTest(test.TestCase): alpha=1.0, beta=0.9): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) diff --git a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py index 089ecf597dfb1890137883d1c05d3c40db3112b5..65ad724b3c3b7f224cab3af398f8e54e0c1721da 100644 --- a/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/lazy_adam_optimizer_test.py @@ -107,7 +107,7 @@ class AdamOptimizerTest(test.TestCase, parameterized.TestCase): @parameterized.parameters([False, True]) def testSparseDevicePlacement(self, use_resource): for index_dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(force_gpu=test.is_gpu_available()): + with self.cached_session(force_gpu=test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). if use_resource: diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py index 1cf9901dc07637675bf49abb764b4bc3cf0224ae..f2c87b588390e6f22700caf54c88044e8556e8e2 100644 --- a/tensorflow/contrib/opt/python/training/powersign_test.py +++ b/tensorflow/contrib/opt/python/training/powersign_test.py @@ -67,7 +67,7 @@ class PowerSignTest(test.TestCase): base=math.e, beta=0.9): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(use_gpu=True): + with self.cached_session(use_gpu=True): # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) @@ -173,7 +173,7 @@ class PowerSignTest(test.TestCase): py_sign_decay_fn=None, base=math.e, beta=0.9): - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: # Initialize variables for numpy implementation. m0, m1 = 0.0, 0.0 diff --git a/tensorflow/contrib/optimizer_v2/adam_test.py b/tensorflow/contrib/optimizer_v2/adam_test.py index b1ad0ade427df2abd209381a7020374850e19fa5..b55739f788ed81723a84e5534c1da4e281333482 100644 --- a/tensorflow/contrib/optimizer_v2/adam_test.py +++ b/tensorflow/contrib/optimizer_v2/adam_test.py @@ -109,7 +109,7 @@ class AdamOptimizerTest(test.TestCase): def testSparseDevicePlacement(self): for index_dtype in [dtypes.int32, dtypes.int64]: - with self.test_session(force_gpu=test.is_gpu_available()): + with self.cached_session(force_gpu=test.is_gpu_available()): # If a GPU is available, tests that all optimizer ops can be placed on # it (i.e. they have GPU kernels). var = variables.Variable([[1.0], [2.0]]) diff --git a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py index 3c68ef995a4244d97c2270ce71a31f5a67c887bf..6362d424ed0b417a67326630c16d67c3cb319dc6 100644 --- a/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py +++ b/tensorflow/contrib/optimizer_v2/checkpointable_utils_test.py @@ -143,10 +143,12 @@ class CheckpointingTests(test.TestCase): suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ name + suffix for name in expected_checkpoint_names] - # The Dense layers also save get_config() JSON - expected_checkpoint_names.extend( - ["model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", - "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON"]) + # The optimizer and Dense layers also save get_config() JSON + expected_checkpoint_names.extend([ + "optimizer/.ATTRIBUTES/OBJECT_CONFIG_JSON", + "model/_second/.ATTRIBUTES/OBJECT_CONFIG_JSON", + "model/_named_dense/.ATTRIBUTES/OBJECT_CONFIG_JSON" + ]) named_variables = {v.name: v for v in named_variables} six.assertCountEqual(self, expected_checkpoint_names, named_variables.keys()) @@ -335,7 +337,7 @@ class CheckpointingTests(test.TestCase): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): - with ops.Graph().as_default(), self.test_session( + with ops.Graph().as_default(), self.session( graph=ops.get_default_graph()), test_util.device(use_gpu=True): model = MyModel() optimizer = adam.AdamOptimizer(0.001) @@ -368,7 +370,7 @@ class CheckpointingTests(test.TestCase): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): - with ops.Graph().as_default(), self.test_session( + with ops.Graph().as_default(), self.session( graph=ops.get_default_graph()), test_util.device(use_gpu=True): model = MyModel() # Don't actually train so we can test variable values @@ -686,7 +688,7 @@ class CheckpointCompatibilityTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = ops.Graph() - with save_graph.as_default(), self.test_session( + with save_graph.as_default(), self.session( graph=save_graph) as session: root = self._initialized_model() name_saver = core_saver.Saver() @@ -731,7 +733,7 @@ class CheckpointCompatibilityTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.graph_mode(): save_graph = ops.Graph() - with save_graph.as_default(), self.test_session( + with save_graph.as_default(), self.session( graph=save_graph) as session: root = self._initialized_model() save_path = root.save( @@ -750,7 +752,7 @@ class CheckpointCompatibilityTests(test.TestCase): save_path = root.save(file_prefix=checkpoint_prefix) with context.graph_mode(): save_graph = ops.Graph() - with save_graph.as_default(), self.test_session( + with save_graph.as_default(), self.session( graph=save_graph): root = self._initialized_model() self._set_sentinels(root) diff --git a/tensorflow/contrib/optimizer_v2/rmsprop_test.py b/tensorflow/contrib/optimizer_v2/rmsprop_test.py index 44301ffe9e5cc9a4ead6462887ec669811f2cc38..202c1e9afc0623a5837aa82480f1b406834007ee 100644 --- a/tensorflow/contrib/optimizer_v2/rmsprop_test.py +++ b/tensorflow/contrib/optimizer_v2/rmsprop_test.py @@ -89,7 +89,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): def testDense(self, dtype, param_value): (learning_rate, decay, momentum, epsilon, centered, use_resource) = tuple( param_value) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1, 0.2], dtype=dtype.as_numpy_dtype) @@ -157,8 +157,11 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): self.assertAllCloseAccordingToType(rms1_np, rms1.eval()) self.assertAllCloseAccordingToType(mom0_np, mom0.eval()) self.assertAllCloseAccordingToType(mom1_np, mom1.eval()) - self.assertAllCloseAccordingToType(var0_np, var0.eval()) - self.assertAllCloseAccordingToType(var1_np, var1.eval()) + # TODO(b/117393988): Reduce tolerances for float16. + self.assertAllCloseAccordingToType( + var0_np, var0.eval(), half_rtol=3e-3, half_atol=3e-3) + self.assertAllCloseAccordingToType( + var1_np, var1.eval(), half_rtol=3e-3, half_atol=3e-3) @parameterized.parameters([dtypes.float32, dtypes.float64]) def testMinimizeSparseResourceVariable(self, dtype): @@ -210,7 +213,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): def testSparse(self, dtype, param_value): (learning_rate, decay, momentum, epsilon, centered, _) = tuple( param_value) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): # Initialize variables for numpy implementation. var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) grads0_np = np.array([0.1], dtype=dtype.as_numpy_dtype) @@ -284,7 +287,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): @parameterized.parameters(_DATA_TYPES) def testWithoutMomentum(self, dtype): - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) @@ -350,7 +353,7 @@ class RMSPropOptimizerTest(test.TestCase, parameterized.TestCase): @parameterized.parameters(_DATA_TYPES) def testWithMomentum(self, dtype): - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): var0 = variables.Variable([1.0, 2.0], dtype=dtype) var1 = variables.Variable([3.0, 4.0], dtype=dtype) grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 7575b1b6cd6b0a9315c1b1e5e50293dc54a953b6..e0c6da00d86fe4c5f881bcab7b444182da092b8f 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -145,7 +145,7 @@ def _FindFusedBatchNorms(graph): Args: graph: Graph to inspect. - Yields: + Returns: _FusedBatchNormMatches. """ input_pattern = graph_matcher.OpTypePattern('*') @@ -169,8 +169,15 @@ def _FindFusedBatchNorms(graph): graph_matcher.OpTypePattern('*'), graph_matcher.OpTypePattern('*') ]) + # Identity between conv/matmul and bn + layer_pattern_with_identity = graph_matcher.OpTypePattern( + 'Identity', + inputs=[ + graph_matcher.OneofPattern([batch_to_space_pattern, layer_pattern]) + ]) layer_output_pattern = graph_matcher.OneofPattern( - [layer_pattern, batch_to_space_pattern]) + [layer_pattern_with_identity, layer_pattern, batch_to_space_pattern]) + # MatMul has a Reshape between it and FusedBatchNorm. matmul_reshape_pattern = graph_matcher.OpTypePattern( 'Reshape', @@ -188,6 +195,11 @@ def _FindFusedBatchNorms(graph): 'Reshape', inputs=[batch_norm_pattern, graph_matcher.OpTypePattern('*')]) + batch_norm_identity_pattern = graph_matcher.OpTypePattern( + 'Identity', inputs=[batch_norm_pattern, matmul_bn_output_reshape_pattern]) + + bn_identity_matcher = graph_matcher.GraphMatcher(batch_norm_identity_pattern) + bn_matcher = graph_matcher.GraphMatcher( graph_matcher.OneofPattern( [matmul_bn_output_reshape_pattern, batch_norm_pattern])) @@ -200,7 +212,17 @@ def _FindFusedBatchNorms(graph): moving_avg_mul_matcher = graph_matcher.GraphMatcher( moving_average_mul_pattern) - for match_result in bn_matcher.match_graph(graph): + def _GetLayerMatch(match_result): + """Populates a layer match object containing ops/tensors for folding BNs. + + Args: + match_result: Matched result from graph matcher + + Returns: + layer_op: Matching conv/fc op prior to batch norm + BatchNormMatch: _BatchNormMatch containing all required batch norm + parameters. + """ moving_mean_tensor = None moving_variance_tensor = None bn_decay_mean_tensor = None @@ -208,7 +230,11 @@ def _FindFusedBatchNorms(graph): batch_to_space_op = None layer_op = match_result.get_op(layer_pattern) layer_tensor = match_result.get_tensor(layer_pattern) + bn_id_op = match_result.get_op(batch_norm_identity_pattern) bn_op = match_result.get_op(batch_norm_pattern) + if bn_id_op is None: + bn_id_op = bn_op + batch_epsilon = bn_op.get_attr('epsilon') # In the MatMul case, the output of batch norm is reshaped back into a @@ -219,13 +245,13 @@ def _FindFusedBatchNorms(graph): # If the matcher didn't match matmul_bn_output_reshape, there will be # another match for this 'MatMul' later, so we can skip this one. if output_reshape_op is None: - continue + return None, None output_tensor = output_reshape_op.outputs[0] # Ensure that the output tensor has consumers, otherwise this is a dangling # node and not a match. if not output_tensor.consumers(): - continue + return None, None batch_to_space_op = match_result.get_op(batch_to_space_pattern) input_tensor = match_result.get_tensor(input_pattern) @@ -277,7 +303,7 @@ def _FindFusedBatchNorms(graph): mean_tensor = match_result.get_tensor(mean_pattern) variance_tensor = match_result.get_tensor(variance_pattern) - yield _BatchNormMatch( + return layer_op, _BatchNormMatch( layer_op=layer_op, bn_op=bn_op, output_tensor=output_tensor, @@ -294,6 +320,26 @@ def _FindFusedBatchNorms(graph): batch_epsilon=batch_epsilon, batch_to_space_op=batch_to_space_op) + layer_matches = [] + # We use matched_layer_set to ensure that layers aren't matched multiple + # times. + matched_layer_set = set() + for match_result in bn_identity_matcher.match_graph(graph): + layer_op, layer_match = _GetLayerMatch(match_result) + if layer_op is not None: + if layer_op not in matched_layer_set: + matched_layer_set.add(layer_op) + layer_matches.append(layer_match) + + for match_result in bn_matcher.match_graph(graph): + layer_op, layer_match = _GetLayerMatch(match_result) + if layer_op is not None: + if layer_op not in matched_layer_set: + matched_layer_set.add(layer_op) + layer_matches.append(layer_match) + + return layer_matches + def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay): """Computes batch norm correction params. diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py index 3f8063cc022726cb745d42aba3c834c71e876e70..77b3f62e9d62085cad87599c53f8a914ccba3f43 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py @@ -48,26 +48,32 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def _RunTestOverParameters(self, test_fn): parameters_list = [ # (relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm, - # freeze_batch_norm_delay) - (nn_ops.relu6, 'Relu6', False, False, False, 100), - (nn_ops.relu, 'Relu', False, False, False, None), - (nn_ops.relu6, 'Relu6', True, False, False, 100), - (nn_ops.relu, 'Relu', True, False, False, None), - (nn_ops.relu6, 'Relu6', False, True, False, 100), - (nn_ops.relu, 'Relu', False, True, False, None), - (nn_ops.relu6, 'Relu6', True, True, False, 100), - (nn_ops.relu, 'Relu', True, True, False, None), + # freeze_batch_norm_delay, insert identity node) + (nn_ops.relu6, 'Relu6', False, False, False, 100, False), + (nn_ops.relu, 'Relu', False, False, False, None, False), + (nn_ops.relu6, 'Relu6', True, False, False, 100, False), + (nn_ops.relu, 'Relu', True, False, False, None, False), + (nn_ops.relu6, 'Relu6', False, True, False, 100, False), + (nn_ops.relu, 'Relu', False, True, False, None, False), + (nn_ops.relu6, 'Relu6', True, True, False, 100, False), + (nn_ops.relu, 'Relu', True, True, False, None, False), # Fused batch norm always has scaling enabled. - (nn_ops.relu6, 'Relu6', False, True, True, None), - (nn_ops.relu, 'Relu', False, True, True, 100), - (nn_ops.relu6, 'Relu6', True, True, True, None), - (nn_ops.relu, 'Relu', True, True, True, 100), + (nn_ops.relu6, 'Relu6', False, True, True, None, False), + (nn_ops.relu, 'Relu', False, True, True, 100, False), + (nn_ops.relu6, 'Relu6', True, True, True, None, False), + (nn_ops.relu, 'Relu', True, True, True, 100, False), + (nn_ops.relu6, 'Relu6', False, True, True, None, True), + (nn_ops.relu, 'Relu', False, True, True, 100, True), + (nn_ops.relu6, 'Relu6', True, True, True, None, True), + (nn_ops.relu, 'Relu', True, True, True, 100, True), ] for params in parameters_list: - test_fn(params[0], params[1], params[2], params[3], params[4], params[5]) + test_fn(params[0], params[1], params[2], params[3], params[4], params[5], + params[6]) def _TestFoldConv2d(self, relu, relu_op_name, with_bypass, has_scaling, - fused_batch_norm, freeze_batch_norm_delay): + fused_batch_norm, freeze_batch_norm_delay, + insert_identity_node): """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*. Args: @@ -79,6 +85,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -87,18 +95,42 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): out_depth = 3 if with_bypass else 32 stride = 1 if with_bypass else 2 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = conv2d( - inputs, - out_depth, [5, 5], - stride=stride, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + if insert_identity_node: + with g.name_scope(name): + node = conv2d( + inputs, + out_depth, [5, 5], + stride=stride, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + conv_out = array_ops.identity(node, name='conv_out') + + node = batch_norm( + conv_out, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + conv_name = name + '/Conv' + else: + node = conv2d( + inputs, + out_depth, [5, 5], + stride=stride, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + conv_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -106,31 +138,30 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(conv_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') self._AssertInputOpsAre(folded_mul, [ - scope + '/correction_mult', - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm) + conv_name + '/correction_mult', + self._BatchNormMultiplierName(conv_name, has_scaling, fused_batch_norm) ]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/Conv2D_Fold']) + self._AssertOutputGoesToOps(folded_mul, g, [conv_name + '/Conv2D_Fold']) - folded_conv = g.get_operation_by_name(scope + '/Conv2D_Fold') + folded_conv = g.get_operation_by_name(conv_name + '/Conv2D_Fold') self.assertEqual(folded_conv.type, 'Conv2D') self._AssertInputOpsAre(folded_conv, - [scope + '/mul_fold', inputs.op.name]) - self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul']) + [conv_name + '/mul_fold', inputs.op.name]) + self._AssertOutputGoesToOps(folded_conv, g, [conv_name + '/post_conv_mul']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(conv_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + conv_name + '/correction_add', + self._BathNormBiasName(conv_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) - + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -143,7 +174,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): relu_op_name='Relu', has_scaling=True, fused_batch_norm=False, - freeze_batch_norm_delay=None): + freeze_batch_norm_delay=None, + insert_identity_node=False): """Tests folding cases for a network with multiple layers. Args: @@ -153,6 +185,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -225,9 +259,14 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) - def _TestFoldConv2dUnknownShape(self, relu, relu_op_name, with_bypass, - has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + def _TestFoldConv2dUnknownShape(self, + relu, + relu_op_name, + with_bypass, + has_scaling, + fused_batch_norm, + freeze_batch_norm_delay, + insert_identity_node=False): """Tests folding cases: inputs -> Conv2d with batch norm -> Relu*. Tests that folding works even with an input shape where some dimensions are @@ -242,6 +281,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -298,9 +339,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def testFoldConv2dUnknownShape(self): self._RunTestOverParameters(self._TestFoldConv2dUnknownShape) - def _TestFoldFullyConnectedLayer(self, relu, relu_op_name, with_bypass, - has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + def _TestFoldFullyConnectedLayer( + self, relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm, + freeze_batch_norm_delay, insert_identity_node): """Tests folding cases: inputs -> FC with batch norm -> Relu*. Args: @@ -312,6 +353,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -319,16 +362,40 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): inputs = array_ops.zeros((batch_size, depth)) out_depth = 256 if with_bypass else 128 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = fully_connected( - inputs, - out_depth, - weights_initializer=self._WeightInit(0.03), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + insert_identity_node = fused_batch_norm + if insert_identity_node: + with g.name_scope(name): + node = fully_connected( + inputs, + out_depth, + weights_initializer=self._WeightInit(0.03), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + node = array_ops.identity(node, name='fc_out') + + node = batch_norm( + node, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + fc_name = name + '/fully_connected' + else: + + node = fully_connected( + inputs, + out_depth, + weights_initializer=self._WeightInit(0.03), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + fc_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -336,30 +403,30 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(fc_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') self._AssertInputOpsAre(folded_mul, [ - scope + '/correction_mult', - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm) + fc_name + '/correction_mult', + self._BatchNormMultiplierName(fc_name, has_scaling, fused_batch_norm) ]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/MatMul_Fold']) + self._AssertOutputGoesToOps(folded_mul, g, [fc_name + '/MatMul_Fold']) - folded_conv = g.get_operation_by_name(scope + '/MatMul_Fold') + folded_conv = g.get_operation_by_name(fc_name + '/MatMul_Fold') self.assertEqual(folded_conv.type, 'MatMul') self._AssertInputOpsAre(folded_conv, - [scope + '/mul_fold', inputs.op.name]) - self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul']) + [fc_name + '/mul_fold', inputs.op.name]) + self._AssertOutputGoesToOps(folded_conv, g, [fc_name + '/post_conv_mul']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(fc_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + fc_name + '/correction_add', + self._BathNormBiasName(fc_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -369,7 +436,7 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def _TestFoldDepthwiseConv2d(self, relu, relu_op_name, with_bypass, has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + freeze_batch_norm_delay, insert_identity_node): """Tests folding: inputs -> DepthwiseConv2d with batch norm -> Relu*. Args: @@ -380,7 +447,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): has_scaling: Bool, when true the batch norm has scaling. fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training - switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm switches to using frozen mean and variance """ g = ops.Graph() with g.as_default(): @@ -388,19 +456,44 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): inputs = array_ops.zeros((batch_size, height, width, 3)) stride = 1 if with_bypass else 2 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = separable_conv2d( - inputs, - None, [5, 5], - stride=stride, - depth_multiplier=1.0, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + if insert_identity_node: + with g.name_scope(name): + node = separable_conv2d( + inputs, + None, [5, 5], + stride=stride, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + node = array_ops.identity(node, name='sep_conv_out') + + node = batch_norm( + node, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + sep_conv_name = name + '/SeparableConv2d' + else: + node = separable_conv2d( + inputs, + None, [5, 5], + stride=stride, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + sep_conv_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -408,40 +501,43 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(sep_conv_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') if fused_batch_norm: - scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape' + scale_reshape_op_name = sep_conv_name + '/BatchNorm_Fold/scale_reshape' else: - scale_reshape_op_name = scope + '/scale_reshape' - self._AssertInputOpsAre(folded_mul, - [scope + '/correction_mult', scale_reshape_op_name]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold']) + scale_reshape_op_name = sep_conv_name + '/scale_reshape' + self._AssertInputOpsAre( + folded_mul, [sep_conv_name + '/correction_mult', scale_reshape_op_name]) + self._AssertOutputGoesToOps(folded_mul, g, + [sep_conv_name + '/depthwise_Fold']) scale_reshape = g.get_operation_by_name(scale_reshape_op_name) self.assertEqual(scale_reshape.type, 'Reshape') self._AssertInputOpsAre(scale_reshape, [ - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm), + self._BatchNormMultiplierName(sep_conv_name, has_scaling, + fused_batch_norm), scale_reshape_op_name + '/shape' ]) - self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold']) + self._AssertOutputGoesToOps(scale_reshape, g, [sep_conv_name + '/mul_fold']) - folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold') + folded_conv = g.get_operation_by_name(sep_conv_name + '/depthwise_Fold') self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative') self._AssertInputOpsAre(folded_conv, - [scope + '/mul_fold', inputs.op.name]) - self._AssertOutputGoesToOps(folded_conv, g, [scope + '/post_conv_mul']) + [sep_conv_name + '/mul_fold', inputs.op.name]) + self._AssertOutputGoesToOps(folded_conv, g, + [sep_conv_name + '/post_conv_mul']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(sep_conv_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + sep_conv_name + '/correction_add', + self._BathNormBiasName(sep_conv_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -450,7 +546,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): self._RunTestOverParameters(self._TestFoldDepthwiseConv2d) def _TestFoldAtrousConv2d(self, relu, relu_op_name, with_bypass, has_scaling, - fused_batch_norm, freeze_batch_norm_delay): + fused_batch_norm, freeze_batch_norm_delay, + insert_identity_node): """Tests folding: inputs -> AtrousConv2d with batch norm -> Relu*. Args: @@ -461,7 +558,9 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): has_scaling: Bool, when true the batch norm has scaling. fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training - switches to using frozen mean and variance + switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ g = ops.Graph() with g.as_default(): @@ -469,19 +568,44 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): inputs = array_ops.zeros((batch_size, height, width, 3)) dilation_rate = 2 activation_fn = None if with_bypass else relu - scope = 'test/test2' if with_bypass else 'test' - node = separable_conv2d( - inputs, - None, [3, 3], - rate=dilation_rate, - depth_multiplier=1.0, - padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, - normalizer_fn=batch_norm, - normalizer_params=self._BatchNormParams( - scale=has_scaling, fused=fused_batch_norm), - scope=scope) + name = 'test/test2' if with_bypass else 'test' + if insert_identity_node: + with g.name_scope(name): + node = separable_conv2d( + inputs, + None, [3, 3], + rate=dilation_rate, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + node = array_ops.identity(node, name='sep_conv_out') + + node = batch_norm( + node, + center=True, + scale=has_scaling, + decay=1.0 - 0.003, + fused=fused_batch_norm) + if activation_fn is not None: + node = activation_fn(node) + sep_conv_name = name + '/SeparableConv2d' + else: + node = separable_conv2d( + inputs, + None, [3, 3], + rate=dilation_rate, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + normalizer_fn=batch_norm, + normalizer_params=self._BatchNormParams( + scale=has_scaling, fused=fused_batch_norm), + scope=name) + sep_conv_name = name if with_bypass: node = math_ops.add(inputs, node, name='test/Add') relu(node, name='test/' + relu_op_name) @@ -489,45 +613,48 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms( g, is_training=True, freeze_batch_norm_delay=freeze_batch_norm_delay) - folded_mul = g.get_operation_by_name(scope + '/mul_fold') + folded_mul = g.get_operation_by_name(sep_conv_name + '/mul_fold') self.assertEqual(folded_mul.type, 'Mul') if fused_batch_norm: - scale_reshape_op_name = scope + '/BatchNorm_Fold/scale_reshape' + scale_reshape_op_name = sep_conv_name + '/BatchNorm_Fold/scale_reshape' else: - scale_reshape_op_name = scope + '/scale_reshape' - self._AssertInputOpsAre(folded_mul, - [scope + '/correction_mult', scale_reshape_op_name]) - self._AssertOutputGoesToOps(folded_mul, g, [scope + '/depthwise_Fold']) + scale_reshape_op_name = sep_conv_name + '/scale_reshape' + self._AssertInputOpsAre( + folded_mul, [sep_conv_name + '/correction_mult', scale_reshape_op_name]) + self._AssertOutputGoesToOps(folded_mul, g, + [sep_conv_name + '/depthwise_Fold']) scale_reshape = g.get_operation_by_name(scale_reshape_op_name) self.assertEqual(scale_reshape.type, 'Reshape') self._AssertInputOpsAre(scale_reshape, [ - self._BatchNormMultiplierName(scope, has_scaling, fused_batch_norm), + self._BatchNormMultiplierName(sep_conv_name, has_scaling, + fused_batch_norm), scale_reshape_op_name + '/shape' ]) - self._AssertOutputGoesToOps(scale_reshape, g, [scope + '/mul_fold']) + self._AssertOutputGoesToOps(scale_reshape, g, [sep_conv_name + '/mul_fold']) - folded_conv = g.get_operation_by_name(scope + '/depthwise_Fold') + folded_conv = g.get_operation_by_name(sep_conv_name + '/depthwise_Fold') self.assertEqual(folded_conv.type, 'DepthwiseConv2dNative') - self._AssertInputOpsAre( - folded_conv, [scope + '/mul_fold', scope + '/depthwise/SpaceToBatchND']) + self._AssertInputOpsAre(folded_conv, [ + sep_conv_name + '/mul_fold', sep_conv_name + '/depthwise/SpaceToBatchND' + ]) if fused_batch_norm: self._AssertOutputGoesToOps(folded_conv, g, - [scope + '/BatchToSpaceND_Fold']) + [sep_conv_name + '/BatchToSpaceND_Fold']) else: - self._AssertOutputGoesToOps(folded_conv, g, - [scope + '/depthwise/BatchToSpaceND_Fold']) + self._AssertOutputGoesToOps( + folded_conv, g, [sep_conv_name + '/depthwise/BatchToSpaceND_Fold']) - folded_add = g.get_operation_by_name(scope + '/add_fold') + folded_add = g.get_operation_by_name(sep_conv_name + '/add_fold') self.assertEqual(folded_add.type, 'Add') self._AssertInputOpsAre(folded_add, [ - scope + '/correction_add', - self._BathNormBiasName(scope, fused_batch_norm) + sep_conv_name + '/correction_add', + self._BathNormBiasName(sep_conv_name, fused_batch_norm) ]) output_op_names = ['test/Add' if with_bypass else 'test/' + relu_op_name] self._AssertOutputGoesToOps(folded_add, g, output_op_names) if freeze_batch_norm_delay is not None: - self._AssertMovingAveragesAreFrozen(g, scope) + self._AssertMovingAveragesAreFrozen(g, name) for op in g.get_operations(): self.assertFalse('//' in op.name, 'Double slash in op %s' % op.name) @@ -535,9 +662,14 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): def testFoldAtrousConv2d(self): self._RunTestOverParameters(self._TestFoldAtrousConv2d) - def _TestCompareFoldAndUnfolded(self, relu, relu_op_name, with_bypass, - has_scaling, fused_batch_norm, - freeze_batch_norm_delay): + def _TestCompareFoldAndUnfolded(self, + relu, + relu_op_name, + with_bypass, + has_scaling, + fused_batch_norm, + freeze_batch_norm_delay, + insert_identity_node=False): """Tests that running folded and unfolded BN returns the same results. Args: @@ -549,6 +681,8 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): fused_batch_norm: Bool, when true the batch norm is fused. freeze_batch_norm_delay: None or the number of steps after which training switches to using frozen mean and variance + insert_identity_node: Bool, insert identity node between conv and batch + norm """ random_seed.set_random_seed(1234) unfolded_g = ops.Graph() diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index d9dc7fa62e11b47e639664f23b08bbeb9ff8bde2..6f659347fba019288361dd0420f2ade6dc1bebaf 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -49,7 +49,7 @@ def _ModelVariable(name, collections=None, trainable=None): collections = list(collections or []) - collections += [ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES] + collections += [ops.GraphKeys.GLOBAL_VARIABLES] return variable_scope.get_variable( name, shape=shape, @@ -62,12 +62,13 @@ def LastValueQuantize(inputs, per_channel=False, init_min=-6.0, init_max=6.0, - vars_collection=ops.GraphKeys.MOVING_AVERAGE_VARIABLES, + vars_collection=None, name_prefix='LastValueQuant', reuse=None, is_training=True, num_bits=8, - narrow_range=False): + narrow_range=False, + symmetric=False): """Adds a layer that collects quantization ranges as last input ranges. LastValueQuantize creates variables called 'min' and 'max', representing the @@ -88,6 +89,8 @@ def LastValueQuantize(inputs, num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. + symmetric: If true, use symmetric quantization limits instead of training + the minimum and maximum of each quantization range separately. Returns: a tensor containing quantized values. """ @@ -104,17 +107,18 @@ def LastValueQuantize(inputs, else: min_max_shape = [] + vars_collections = [vars_collection] if vars_collection else [] min_var = _ModelVariable( 'min', shape=min_max_shape, initializer=init_ops.constant_initializer(init_min), - collections=[vars_collection], + collections=vars_collections, trainable=False) max_var = _ModelVariable( 'max', shape=min_max_shape, initializer=init_ops.constant_initializer(init_max), - collections=[vars_collection], + collections=vars_collections, trainable=False) if not is_training: return _FakeQuantWithMinMaxVars( @@ -139,9 +143,6 @@ def LastValueQuantize(inputs, batch_min = inputs else: batch_min = math_ops.reduce_min(inputs, name='BatchMin') - # TFLite requires that 0.0 if always in the [min; max] range. - batch_min = math_ops.minimum(batch_min, 0.0) - assign_min = state_ops.assign(min_var, batch_min, name='AssignMinLast') if per_channel: if input_dim >= 2: @@ -151,9 +152,26 @@ def LastValueQuantize(inputs, batch_max = inputs else: batch_max = math_ops.reduce_max(inputs, name='BatchMax') - # TFLite requires that 0.0 if always in the [min; max] range. - batch_max = math_ops.maximum(batch_max, 0.0) - assign_max = state_ops.assign(max_var, batch_max, name='AssignMaxLast') + + if symmetric: + if narrow_range: + min_max_ratio = -1 + else: + # In two's complement notation, the negative range is slightly larger + # than the positive range. + min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits) + + # TFLite requires that 0.0 if always in the [min; max] range. Because + # batch_min <= batch_max, it follows that range_min <= 0 <= range_max. + range_min = math_ops.minimum(batch_min, batch_max / min_max_ratio) + range_max = math_ops.maximum(batch_max, batch_min * min_max_ratio) + else: + # TFLite requires that 0.0 if always in the [min; max] range. + range_min = math_ops.minimum(batch_min, 0.0) + range_max = math_ops.maximum(batch_max, 0.0) + + assign_min = state_ops.assign(min_var, range_min, name='AssignMinLast') + assign_max = state_ops.assign(max_var, range_max, name='AssignMaxLast') return _FakeQuantWithMinMaxVars( inputs, @@ -174,7 +192,8 @@ def MovingAvgQuantize(inputs, reuse=None, is_training=True, num_bits=8, - narrow_range=False): + narrow_range=False, + symmetric=False): """Adds a layer that collects quantization ranges as EMAs of input ranges. MovingAvgQuantize creates variables called 'min' and 'max', representing the @@ -196,6 +215,8 @@ def MovingAvgQuantize(inputs, num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. + symmetric: If true, use symmetric quantization limits instead of training + the minimum and maximum of each quantization range separately. Returns: a tensor containing quantized values. """ @@ -212,17 +233,18 @@ def MovingAvgQuantize(inputs, else: min_max_shape = [] + vars_collections = [vars_collection] if vars_collection else [] min_var = _ModelVariable( 'min', shape=min_max_shape, initializer=init_ops.constant_initializer(init_min), - collections=[vars_collection], + collections=vars_collections, trainable=False) max_var = _ModelVariable( 'max', shape=min_max_shape, initializer=init_ops.constant_initializer(init_max), - collections=[vars_collection], + collections=vars_collections, trainable=False) if not is_training: return _FakeQuantWithMinMaxVars( @@ -246,10 +268,6 @@ def MovingAvgQuantize(inputs, batch_min = inputs else: batch_min = math_ops.reduce_min(inputs, name='BatchMin') - # B-eng requires that 0.0 if always in the [min; max] range. - batch_min = math_ops.minimum(batch_min, 0.0) - assign_min = moving_averages.assign_moving_average( - min_var, batch_min, ema_decay, name='AssignMinEma') if per_channel: if input_dim >= 2: @@ -259,10 +277,28 @@ def MovingAvgQuantize(inputs, batch_max = inputs else: batch_max = math_ops.reduce_max(inputs, name='BatchMax') - # B-eng requires that 0.0 if always in the [min; max] range. - batch_max = math_ops.maximum(batch_max, 0.0) + + if symmetric: + if narrow_range: + min_max_ratio = -1 + else: + # In two's complement notation, the negative range is slightly larger + # than the positive range. + min_max_ratio = -((1 << num_bits) - 2) / (1 << num_bits) + + # TFLite requires that 0.0 if always in the [min; max] range. Because + # batch_min <= batch_max, it follows that range_min <= 0 <= range_max. + range_min = math_ops.minimum(batch_min, batch_max / min_max_ratio) + range_max = math_ops.maximum(batch_max, batch_min * min_max_ratio) + else: + # TFLite requires that 0.0 if always in the [min; max] range. + range_min = math_ops.minimum(batch_min, 0.0) + range_max = math_ops.maximum(batch_max, 0.0) + + assign_min = moving_averages.assign_moving_average( + min_var, range_min, ema_decay, name='AssignMinEma') assign_max = moving_averages.assign_moving_average( - max_var, batch_max, ema_decay, name='AssignMaxEma') + max_var, range_max, ema_decay, name='AssignMaxEma') return _FakeQuantWithMinMaxVars( inputs, diff --git a/tensorflow/contrib/quantize/python/quant_ops_test.py b/tensorflow/contrib/quantize/python/quant_ops_test.py index a45840009b758881c14fb64b2d39af6cd4ec4bc4..36d2af94e059cdc75b758bbf607d26c4e1ee73e9 100644 --- a/tensorflow/contrib/quantize/python/quant_ops_test.py +++ b/tensorflow/contrib/quantize/python/quant_ops_test.py @@ -29,51 +29,55 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import googletest _MIN_MAX_VARS = 'min_max_vars' +_SYMMETRIC_RANGE_RATIO = 0.9921875 # 127 / 128 class QuantOpsTest(googletest.TestCase): def testLastValueQuantizeTrainingAssign(self): - g = ops.Graph() - with session.Session(graph=g) as sess: - x = array_ops.placeholder(dtypes.float32, shape=[2]) - y = quant_ops.LastValueQuantize( - x, - init_min=0.0, - init_max=0.0, - is_training=True, - vars_collection=_MIN_MAX_VARS) + min_value, max_value = self._GetMinMaxValues(quant_ops.LastValueQuantize, + [[-1, 1]]) + self.assertEqual(min_value, -1.0) + self.assertEqual(max_value, 1.0) - # Run the step. - sess.run(variables.global_variables_initializer()) - sess.run(y, feed_dict={x: [-1.0, 1.0]}) - # Now check that the min_max_vars were, in fact, updated. - min_value, max_value = self._GetMinMaxValues(sess) - self.assertEqual(min_value, -1.0) - self.assertEqual(max_value, 1.0) + def testLastValueSymmetricQuantizeTrainingAssign(self): + min_value, max_value = self._GetMinMaxValues( + quant_ops.LastValueQuantize, + [[-_SYMMETRIC_RANGE_RATIO, _SYMMETRIC_RANGE_RATIO]], + symmetric=True, + narrow_range=False) + self.assertEqual(min_value, -1.0) + self.assertEqual(max_value, _SYMMETRIC_RANGE_RATIO) + + def testLastValueSymmetricQuantizeNarrowRangeTrainingAssign(self): + min_value, max_value = self._GetMinMaxValues( + quant_ops.LastValueQuantize, [[-1, 0.5]], + symmetric=True, + narrow_range=True) + self.assertEqual(min_value, -1.0) + self.assertEqual(max_value, 1) def testMovingAvgQuantizeTrainingAssign(self): - g = ops.Graph() - with session.Session(graph=g) as sess: - x = array_ops.placeholder(dtypes.float32, shape=[2]) - y = quant_ops.MovingAvgQuantize( - x, - init_min=0.0, - init_max=0.0, - is_training=True, - vars_collection=_MIN_MAX_VARS) + min_value, max_value = self._GetMinMaxValues(quant_ops.MovingAvgQuantize, + [[-1, 1], [0, 0]]) + self.assertAlmostEqual(min_value, -0.5, delta=1e-3) + self.assertAlmostEqual(max_value, 0.5, delta=1e-3) - # Run the step. - sess.run(variables.global_variables_initializer()) - # Do two runs to avoid zero debias. - sess.run(y, feed_dict={x: [-1.0, 1.0]}) - sess.run(y, feed_dict={x: [0.0, 0.0]}) - # Now check that the min_max_vars were, in fact, updated. - min_value, max_value = self._GetMinMaxValues(sess) - self.assertGreater(min_value, -1.0) - self.assertLess(min_value, 0.0) - self.assertGreater(max_value, 0.0) - self.assertLess(max_value, 1.0) + def testMovingAvgSymmetricQuantizeTrainingAssign(self): + min_value, max_value = self._GetMinMaxValues( + quant_ops.MovingAvgQuantize, [[-1, 0.5], [0, 0]], symmetric=True) + self.assertAlmostEqual(min_value, -0.5, delta=1e-3) + self.assertAlmostEqual(max_value, 0.5 * _SYMMETRIC_RANGE_RATIO, delta=1e-3) + self.assertAlmostEqual(max_value, min_value * -_SYMMETRIC_RANGE_RATIO) + + def testMovingAvgSymmetricQuantizeNarrowRangeTrainingAssign(self): + min_value, max_value = self._GetMinMaxValues( + quant_ops.MovingAvgQuantize, [[-1, 0.5], [0, 0]], + symmetric=True, + narrow_range=True) + self.assertAlmostEqual(min_value, -0.5, delta=1e-3) + self.assertAlmostEqual(max_value, 0.5, delta=1e-3) + self.assertAlmostEqual(max_value, -min_value) def testVariablesNotPartitioned_LastValue(self): # Variables added should not use a default partiioner since they are @@ -105,14 +109,31 @@ class QuantOpsTest(googletest.TestCase): is_training=True, vars_collection=_MIN_MAX_VARS) - def _GetMinMaxValues(self, sess): - min_max_vars = ops.get_collection(_MIN_MAX_VARS) - self.assertEqual(len(min_max_vars), 2) - min_idx = 0 if 'min' in min_max_vars[0].name else 1 - max_idx = (min_idx + 1) % 2 - min_var, max_var = min_max_vars[min_idx], min_max_vars[max_idx] - min_max_values = sess.run([min_var, max_var]) - return min_max_values[0], min_max_values[1] + def _GetMinMaxValues(self, quantize_fn, input_values, **kwds): + g = ops.Graph() + with session.Session(graph=g) as sess: + x = array_ops.placeholder(dtypes.float32, shape=[2]) + y = quantize_fn( + x, + init_min=0.0, + init_max=0.0, + is_training=True, + vars_collection=_MIN_MAX_VARS, + **kwds) + + # Run the step. + sess.run(variables.global_variables_initializer()) + for input_elem in input_values: + sess.run(y, feed_dict={x: input_elem}) + + # Now check that the min_max_vars were, in fact, updated. + min_max_vars = ops.get_collection(_MIN_MAX_VARS) + self.assertEqual(len(min_max_vars), 2) + min_idx = 0 if 'min' in min_max_vars[0].name else 1 + max_idx = (min_idx + 1) % 2 + min_var, max_var = min_max_vars[min_idx], min_max_vars[max_idx] + min_max_values = sess.run([min_var, max_var]) + return min_max_values[0], min_max_values[1] if __name__ == '__main__': diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 5e63d33db85a511f33afa1f22647aba755b85cbf..92ca3f203954414159954f7f5d220f95b17967d0 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -32,13 +32,22 @@ from tensorflow.python.platform import tf_logging as logging _QUANTIZABLE_TYPES = {'Conv2D', 'MatMul', 'DepthwiseConv2dNative'} # Activations that are supported by the quantization rewrite. -_ACTIVATION_TYPES = {'Relu', 'Relu6'} +_ACTIVATION_TYPES = {'Relu', 'Relu6', 'Identity'} + +_RELU_TYPES = {'Relu', 'Relu6'} + +_QUANTIZATION_OP = {'FakeQuantWithMinMaxVars'} +_VALID_SRC_OP = {'Add', 'Mul'} +_INTERMEDIATE_OP = {'Add', 'Mul'} +_PASS_THROUGH_OP = {'Reshape', 'Identity', 'BatchToSpaceND', 'SpaceToBatchND'} +_VALID_ACTIVATION_OP = {'Relu', 'Relu6'} def Quantize(graph, is_training, weight_bits=8, activation_bits=8, + symmetric=False, ema_decay=0.999, quant_delay=None, vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, @@ -56,6 +65,8 @@ def Quantize(graph, is_training: Whether quantizing training graph or eval graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. + symmetric: (Optional) If true, use symmetric quantization limits instead of + training the minimum and maximum of each quantization range separately. ema_decay: (Optional) Float, EMA decay parameter. EMA is used to update quantization intervals for quantizing activations (see here about EMA: https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average). @@ -73,6 +84,7 @@ def Quantize(graph, scope += '/' input_to_ops_map = input_to_ops.InputToOps(graph) + quantized_ops = set() for layer_match in _FindLayersToQuantize(graph): # Quantize the weights. context = _GetContextFromOp(layer_match.layer_op) @@ -82,7 +94,8 @@ def Quantize(graph, _InsertQuantOp( context, 'weights_quant', - layer_match.weight_tensor.op, [layer_match.layer_op], + layer_match.weight_tensor.op, + input_to_ops_map.ConsumerOperations(layer_match.weight_tensor.op), is_training, moving_avg=False, ema_decay=ema_decay, @@ -90,6 +103,7 @@ def Quantize(graph, narrow_range=True, vars_collection=vars_collection, bits=weight_bits, + symmetric=symmetric, consumer_scope=scope) # Quantize the activations. @@ -115,8 +129,10 @@ def Quantize(graph, quant_delay=quant_delay, vars_collection=vars_collection, bits=activation_bits, + symmetric=symmetric, init_min=0.0, producer_scope=scope) + quantized_ops.add(layer_match.activation_op) # Quantize the inputs and output to the bypass (if it exists). The input to # the bypass is the bias add, and the output is the activation. @@ -126,15 +142,18 @@ def Quantize(graph, _InsertQuantOp( context, 'conv_quant', - layer_match.bias_add_op, [layer_match.bypass_op], + layer_match.bias_add_op, + input_to_ops_map.ConsumerOperations(layer_match.bias_add_op), is_training, moving_avg=True, ema_decay=ema_decay, quant_delay=quant_delay, vars_collection=vars_collection, bits=activation_bits, + symmetric=symmetric, producer_scope=scope, consumer_scope=scope) + quantized_ops.add(layer_match.bias_add_op) # Make sure the op following this isn't an activation. In which case, we # shouldn't quantize it, since the activation will be Fused into the # Add at inference time. @@ -154,8 +173,10 @@ def Quantize(graph, quant_delay=quant_delay, vars_collection=vars_collection, bits=activation_bits, + symmetric=symmetric, producer_scope=scope, consumer_scope=scope) + quantized_ops.add(layer_match.bypass_op) # Quantize bypass ops that occur after the activation. if layer_match.post_activation_bypass_op is not None: @@ -172,7 +193,7 @@ def Quantize(graph, # Add at inference time. consumers = input_to_ops_map.ConsumerOperations( layer_match.post_activation_bypass_op) - if any([consumer.type in _ACTIVATION_TYPES for consumer in consumers]): + if any([consumer.type in _RELU_TYPES for consumer in consumers]): logging.info('Skipping %s, because its followed by an activation.', layer_match.post_activation_bypass_op.name) else: @@ -187,7 +208,117 @@ def Quantize(graph, quant_delay=quant_delay, vars_collection=vars_collection, bits=activation_bits, + symmetric=symmetric, producer_scope=scope) + quantized_ops.add(layer_match.post_activation_bypass_op) + + _QuantizeActivationLayers( + quantized_ops, + graph, + is_training, + activation_bits, + ema_decay, + quant_delay, + vars_collection, + scope=scope) + + +def _QuantizeActivationLayers(quantized_ops, + graph, + is_training, + activation_bits=8, + ema_decay=0.999, + quant_delay=None, + vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, + scope=None): + """Quantize intermediate activation tensors after addition and multiplication. + + Args: + quantized_ops: Set of previously quantized activation ops. + graph: Graph to modify. + is_training: Whether quantizing training graph or eval graph. + activation_bits: Number of bits to use for quantizing activations. + ema_decay: (Optional) Float, EMA decay parameter. EMA is used to update + quantization intervals for quantizing activations (see here about EMA: + https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average). + quant_delay: (Optional, default None) Int, count of global steps for which + to delay quantization. This helps weights stabilize at the start of + training. + vars_collection: (Optional) Collection where to store the variables for + quantization interval ends. + scope: The scope to be transformed. If it's not None, only the ops which are + in this scope will be transformed. + + Raises: + ValueError: When quantization fails. + """ + input_to_ops_map = input_to_ops.InputToOps(graph) + for op in (op for op in graph.get_operations()): + if _CheckIfQuantizableOp(op, quantized_ops): + logging.info('Inserting fake quant op activation_%s_quant after %s', + op.type, op.name) + consumers = input_to_ops_map.ConsumerOperations(op) + _InsertQuantOp( + op.name, + 'activation_' + op.type + '_quant', + op, + consumers, + is_training, + moving_avg=True, + ema_decay=ema_decay, + quant_delay=quant_delay, + vars_collection=vars_collection, + bits=activation_bits, + producer_scope=scope) + + +def _CheckIfQuantizableOp(src_op, quantized_ops): + """Check if the output of an op should be quantized. + + Args: + src_op: op to be checked + quantized_ops: Set of previously quantized activation ops. + + Returns: + Boolean specifying if output should be quantized or not. + """ + src_op_name = set([src_op.type]) + if src_op in quantized_ops: + return False + if not src_op_name.intersection(_VALID_SRC_OP): + return False + + # If src op is an add or a mul and the output is immediately + # followed by an activation skip + if len(src_op.outputs) == 1 and len(src_op.outputs[0].consumers()) == 1: + op_consumers = src_op.outputs[0].consumers() + if set([op_consumers[0].type]).intersection(_VALID_ACTIVATION_OP): + logging.info('Skipping quant after %s', src_op.name) + return False + # Is an Add or a Mul + input_ops = src_op.inputs + + for op in input_ops: + curr_op = op.op + curr_op_type = set([curr_op.type]) + while curr_op_type.intersection(_PASS_THROUGH_OP): + # Walk back through pass through ops + curr_op = curr_op.inputs[0].op + curr_op_type = set([curr_op.type]) + # Now at a valid or quantizable op, need to check if + # atleast one of the inputs to a valid op is connected + # to a quantizable op via pass through ops + + if (curr_op_type.intersection(_QUANTIZATION_OP) or + curr_op.name.find('delayed_quant/Merge') > 0): + return True + + if curr_op_type.intersection(_INTERMEDIATE_OP): + # Check if atleast one input to intermediate_op are quantizable + for input_op in curr_op.inputs: + if _CheckIfQuantizableOp(input_op.op, quantized_ops): + return True + return False def _FindLayersToQuantize(graph): @@ -384,10 +515,11 @@ def _FindLayersToQuantize(graph): bias_add_op = match_result.get_op(folded_bias_add_pattern) bypass_op = match_result.get_op(bypass_pattern) if layer_op not in matched_layer_set: - matched_layer_set.add(layer_op) - layer_matches.append( - _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None, - bias_add_op)) + if not _IsSkipLayer(activation_op): + matched_layer_set.add(layer_op) + layer_matches.append( + _LayerMatch(layer_op, weight_tensor, activation_op, bypass_op, None, + bias_add_op)) # Match the final layer, where there may not be an activation and instead # the output of the final BiasAdd must be quantized. So we treat the BiasAdd @@ -424,6 +556,32 @@ def _FindLayersToQuantize(graph): return layer_matches +def _IsSkipLayer(activation_op): + """Skip quantizing conv->identity->Batch norm layers. + + Args: + activation_op: Activation op detected by layer matching pattern + + Returns: + skip_layer: boolean, true when conv->identity->batch norm is detected. + """ + + # Exclude quantization of conv->identity->BN, + # After folding, this part corresponds to estimation of mean and variance + # and should not be quantized. + skip_layer = False + if activation_op.type == 'Identity' and len(activation_op.outputs) == 1: + if len(activation_op.outputs[0].consumers()) == 1: + consumer = activation_op.outputs[0].consumers()[0] + if consumer.type == 'FusedBatchNorm': + skip_layer = True + logging.info( + 'Skipping quantizing %s, because it is the output of a conv/fc ' + 'followed by a identity, feeding a fused batch norm.', + activation_op.name) + return skip_layer + + class _LayerMatch(object): """Contains all information related to a matched Layer.""" @@ -488,6 +646,7 @@ def _InsertQuantOp(context, init_min=-6.0, init_max=6.0, bits=8, + symmetric=False, ema_decay=0.999, quant_delay=None, vars_collection=ops.GraphKeys.GLOBAL_VARIABLES, @@ -508,6 +667,8 @@ def _InsertQuantOp(context, init_min: Starting minimum value for the new quantization op. init_max: Starting maximum value for the new quantization op. bits: Number of bits to use for quantization, must be between 2 and 8. + symmetric: (Optional) If true, use symmetric quantization limits instead of + training the minimum and maximum of each quantization range separately. ema_decay: (Optional) Float, EMA decay parameter. EMA is used to update quantization intervals for quantizing activations (see here about EMA: https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average). @@ -571,6 +732,7 @@ def _InsertQuantOp(context, ema_decay=ema_decay, is_training=is_training, num_bits=bits, + symmetric=symmetric, narrow_range=narrow_range, vars_collection=vars_collection, name_prefix=name_prefix)) @@ -582,6 +744,7 @@ def _InsertQuantOp(context, init_max=init_max, is_training=is_training, num_bits=bits, + symmetric=symmetric, narrow_range=narrow_range, vars_collection=vars_collection, name_prefix=name_prefix)) diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 484493f1b2a64ae68b16a03ac74e75a5e84bb3de..2a256a3c51cbf91d34c2639bf1adb74deffa2fed 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -27,6 +27,7 @@ def _create_graph(input_graph=None, is_training=True, weight_bits=8, activation_bits=8, + symmetric=False, quant_delay=None, freeze_bn_delay=None, scope=None): @@ -43,6 +44,8 @@ def _create_graph(input_graph=None, is_training: Whether quantizing training or eval graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. + symmetric: If true, use symmetric quantization limits instead of training + the minimum and maximum of each quantization range separately. quant_delay: Number of steps after which weights and activations are quantized during training. freeze_bn_delay: Number of steps after which moving mean and variance are @@ -74,6 +77,7 @@ def _create_graph(input_graph=None, quant_delay=quant_delay, weight_bits=weight_bits, activation_bits=activation_bits, + symmetric=symmetric, scope=scope) @@ -142,6 +146,7 @@ def create_eval_graph(input_graph=None): def experimental_create_training_graph(input_graph=None, weight_bits=8, activation_bits=8, + symmetric=False, quant_delay=0, freeze_bn_delay=None, scope=None): @@ -173,6 +178,8 @@ def experimental_create_training_graph(input_graph=None, default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. + symmetric: If true, use symmetric quantization limits instead of training + the minimum and maximum of each quantization range separately. quant_delay: Number of steps after which weights and activations are quantized during training. freeze_bn_delay: Number of steps after which moving mean and variance are @@ -192,6 +199,7 @@ def experimental_create_training_graph(input_graph=None, is_training=True, weight_bits=weight_bits, activation_bits=activation_bits, + symmetric=symmetric, quant_delay=quant_delay, freeze_bn_delay=freeze_bn_delay, scope=scope) @@ -200,6 +208,7 @@ def experimental_create_training_graph(input_graph=None, def experimental_create_eval_graph(input_graph=None, weight_bits=8, activation_bits=8, + symmetric=False, quant_delay=None, scope=None): """Rewrites an eval input_graph in place for simulated quantization. @@ -219,6 +228,8 @@ def experimental_create_eval_graph(input_graph=None, default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. + symmetric: If true, use symmetric quantization limits instead of training + the minimum and maximum of each quantization range separately. quant_delay: Number of steps after which weights and activations are quantized during eval. scope: The scope to be transformed. If it's not None, only the ops which @@ -233,6 +244,7 @@ def experimental_create_eval_graph(input_graph=None, is_training=False, weight_bits=weight_bits, activation_bits=activation_bits, + symmetric=symmetric, quant_delay=quant_delay, scope=scope) diff --git a/tensorflow/contrib/quantize/python/quantize_graph_test.py b/tensorflow/contrib/quantize/python/quantize_graph_test.py index e80d2183a69096f1148160126b025dbaacbcb137..9aa6e2c24d44c9c81f72cc6d1cfc7c9c4e3e15e5 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph_test.py +++ b/tensorflow/contrib/quantize/python/quantize_graph_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools + from tensorflow.contrib.layers.python.layers import layers from tensorflow.contrib.quantize.python import quantize_graph from tensorflow.python import training @@ -27,6 +29,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import template from tensorflow.python.platform import googletest @@ -48,6 +51,8 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): rewrite_fns = [ quantize_graph.create_training_graph, quantize_graph.experimental_create_training_graph, + functools.partial( + quantize_graph.experimental_create_training_graph, symmetric=True), ] for fn in rewrite_fns: test_fn(fn) @@ -56,6 +61,8 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): rewrite_fns = [ quantize_graph.create_eval_graph, quantize_graph.experimental_create_eval_graph, + functools.partial( + quantize_graph.experimental_create_eval_graph, symmetric=True), ] for fn in rewrite_fns: test_fn(fn) @@ -267,6 +274,51 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): graph_def_after = str(g.as_graph_def()) self.assertEqual(graph_def_before, graph_def_after) + def testIdentityNode(self): + self._RunTestOverAllRewrites(self._TestIdentityNode) + + def _TestIdentityNode(self, rewrite_fn): + graph = ops.Graph() + with graph.as_default(): + self._LayerWithIdentity() + + rewrite_fn(graph) + op_names = [op.name for op in graph.get_operations()] + self.assertTrue(any('test/Conv/weights_quant' in name for name in op_names)) + self.assertTrue(any('test/Conv/act_quant' in name for name in op_names)) + bn_out_identity = graph.get_operation_by_name('test/bn_out') + self._AssertInputOpsAre(bn_out_identity, [ + 'test/Conv/add_fold', + ]) + + conv_out_identity = graph.get_operation_by_name('test/conv_out') + self._AssertOutputGoesToOps(conv_out_identity, graph, + ['test/BatchNorm/FusedBatchNorm']) + + def testActivationQuantization(self): + self._RunTestOverAllRewrites(self._TestActivationQuantization) + + def _TestActivationQuantization(self, rewrite_fn): + graph = ops.Graph() + with graph.as_default(): + _ = self._LayerWithActivationProcessing() + + rewrite_fn(graph) + # Check if outputs of multipliers and adds are quantized. + + mul_op = graph.get_operation_by_name('test/Mul') + self._AssertOutputGoesToOps( + mul_op, graph, + ['test/Mul/activation_Mul_quant/FakeQuantWithMinMaxVars']) + mul_op = graph.get_operation_by_name('test/Mul_1') + self._AssertOutputGoesToOps( + mul_op, graph, + ['test/Mul_1/activation_Mul_quant/FakeQuantWithMinMaxVars']) + add_op = graph.get_operation_by_name('test/add') + self._AssertOutputGoesToOps( + add_op, graph, + ['test/add/activation_Add_quant/FakeQuantWithMinMaxVars']) + def testRewriteWithScope(self): self._RunTestOverExperimentalRewritesWithScope( self._TestRewriteWithScope, 'scope1') @@ -306,6 +358,82 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): # No ops should be inserted or removed. self.assertEqual(op_names_before_rewrite, op_names_after_rewrite) + def testActivationRewriteWithScope(self): + self._RunTestOverExperimentalRewritesWithScope( + self._TestActivationRewriteWithScope, 'scope1') + + def _TestActivationRewriteWithScope(self, rewrite_fn): + graph = ops.Graph() + with graph.as_default(): + output = self._LayerWithIdentity(scope='scope1') + with ops.name_scope('scope2'): + output = nn_ops.relu6(output) + scaled_output1 = math_ops.mul(2.0, output) + scaled_output2 = math_ops.mul(3.0, output) + output = scaled_output1 + scaled_output2 + rewrite_fn(graph) + + op_names = [op.name for op in graph.get_operations()] + # The weights and activation of scope1 is quantized, but not scope2. + self.assertTrue(any('scope1/Conv/act_quant' in name for name in op_names)) + self.assertTrue( + any('scope1/Conv/weights_quant' in name for name in op_names)) + + for op_name in op_names: + if op_name.startswith('scope2'): + self.assertTrue('FakeQuant' not in op_name) + + def testActivationRewriteWithNonMatchingScope(self): + self._RunTestOverExperimentalRewritesWithScope( + self._TestActivationRewriteWithNonMatchingScope, 'NonExistingScope') + + def _TestActivationRewriteWithNonMatchingScope(self, rewrite_fn): + graph = ops.Graph() + with graph.as_default(): + self._LayerWithActivationProcessing() + + rewrite_fn(graph) + op_types_after_rewrite = set([op.type for op in graph.get_operations()]) + self.assertFalse( + op_types_after_rewrite.intersection('FakeQuantWithMinMaxVars')) + # No fake quant ops should be inserted. + + def testWithSharedWeights(self): + + self._RunTestOverAllRewrites(self._TestWithSharedWeights) + self._RunTestOverTrainingRewrites(self._TestRewriteWithSharedWeights) + + def _TestRewriteWithSharedWeights(self, rewrite_fn, quant_delay=1): + self._TestWithSharedWeights(rewrite_fn, quant_delay) + + def _TestWithSharedWeights(self, rewrite_fn, quant_delay=None): + with ops.Graph().as_default() as g: + conv = template.make_template('shared_weights_conv', self._ConvLayer) + conv() + conv() + if quant_delay is None: + rewrite_fn() + else: + rewrite_fn(quant_delay=quant_delay) + + conv_ops = [op for op in g.get_operations() if op.type == 'Conv2D'] + weights_quants = [ + op for op in g.get_operations() + if 'weights_quant' in op.name and op.type == 'FakeQuantWithMinMaxVars' + ] + # Check that the shared weights variable is not quantized multiple times + self.assertTrue(len(weights_quants) == 1) + weights_quant_tensor = weights_quants[0].outputs[0] + if quant_delay: + delayed_weights_quants = [ + op for op in g.get_operations() + if 'weights_quant' in op.name and op.type == 'Merge' + ] + self.assertTrue(len(delayed_weights_quants) == 1) + weights_quant_tensor = delayed_weights_quants[0].outputs[0] + # Check that the Conv2D operations get the quantized weights + self.assertTrue(all(weights_quant_tensor in op.inputs for op in conv_ops)) + def _ConvLayer( self, input_tensor=None, scope='test', pre_activation_bypass=False, post_activation_bypass=False): @@ -328,6 +456,85 @@ class QuantizeGraphTest(test_util.TensorFlowTestCase): output += input_tensor return output + def _LayerWithIdentity(self, + input_tensor=None, + scope='test', + post_activation_bypass=False): + """Add a basic conv, identity, batch norm with skip to the default graph.""" + batch_size, height, width, depth = 5, 128, 128, 3 + if input_tensor is None: + input_tensor = array_ops.zeros((batch_size, height, width, depth)) + weight_init = init_ops.truncated_normal_initializer + with ops.name_scope(scope): + output = layers.conv2d( + input_tensor, + depth, [5, 5], + padding='SAME', + weights_initializer=weight_init(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + output = array_ops.identity(output, name='conv_out') + + output = layers.batch_norm( + output, center=True, scale=True, decay=1.0 - 0.003, fused=True) + + output = array_ops.identity(output, name='bn_out') + if post_activation_bypass: + output += input_tensor + return output + + def _LayerWithActivationProcessing(self, + input_tensor=None, + scope='test', + post_activation_bypass=False): + + batch_size, height, width, depth = 5, 128, 128, 3 + if input_tensor is None: + input_tensor = array_ops.zeros((batch_size, height, width, depth)) + weight_init = init_ops.truncated_normal_initializer + with ops.name_scope(scope): + output = layers.conv2d( + input_tensor, + depth, [5, 5], + padding='SAME', + weights_initializer=weight_init(0.09), + activation_fn=None, + normalizer_fn=None, + biases_initializer=None) + + output = layers.batch_norm( + output, center=True, scale=True, decay=1.0 - 0.003, fused=True) + + output = nn_ops.relu6(output) + scaled_output1 = math_ops.mul(2.0, output) + scaled_output2 = math_ops.mul(3.0, output) + output = scaled_output1 + scaled_output2 + return output + + def _AssertInputOpsAre(self, op, in_op_names): + """Asserts that all inputs to op come from in_op_names (disregarding order). + + Args: + op: Operation to check inputs for. + in_op_names: List of strings, operations where all op's inputs should come + from. + """ + expected_inputs = [in_op_name + ':0' for in_op_name in in_op_names] + self.assertItemsEqual([t.name for t in op.inputs], expected_inputs) + + def _AssertOutputGoesToOps(self, op, graph, out_op_names): + """Asserts that outputs from op go to out_op_names (and perhaps others). + + Args: + op: Operation to check outputs for. + graph: Graph where output operations are located. + out_op_names: List of strings, operations where op's outputs should go. + """ + for out_op_name in out_op_names: + out_op = graph.get_operation_by_name(out_op_name) + self.assertIn(op.outputs[0].name, [str(t.name) for t in out_op.inputs]) + if __name__ == '__main__': googletest.main() diff --git a/tensorflow/contrib/rate/rate.py b/tensorflow/contrib/rate/rate.py index 24d586479a61631461e41bda507f95a3c167f754..d948066b36426e73171d5efa4c4ed0d84c9e3341 100644 --- a/tensorflow/contrib/rate/rate.py +++ b/tensorflow/contrib/rate/rate.py @@ -108,13 +108,6 @@ class Rate(object): def variables(self): return self._vars - def _safe_div(self, numerator, denominator, name): - t = math_ops.truediv(numerator, denominator) - zero = array_ops.zeros_like(t, dtype=denominator.dtype) - condition = math_ops.greater(denominator, zero) - zero = math_ops.cast(zero, t.dtype) - return array_ops.where(condition, t, zero, name=name) - def _add_variable(self, name, shape=None, dtype=None): """Private method for adding variables to the graph.""" if self._built: @@ -148,4 +141,6 @@ class Rate(object): state_ops.assign(self.prev_values, values) state_ops.assign(self.prev_denominator, denominator) - return self._safe_div(self.numer, self.denom, name="safe_rate") + return math_ops.div_no_nan(self.numer, + math_ops.maximum(self.denom, 0), + name="safe_rate") diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc index 2def4f3f176b8d4d26c2c94168e9698f14649d94..edcef3adeaa22a5b5b1da47ee3f17dc04b737199 100644 --- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc +++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.cc @@ -30,6 +30,11 @@ using thread::ThreadPool; namespace functor { +#define Sum(a, b) ((a) + (b)) +#define Prod(a, b) ((a) * (b)) +#define Max(a, b) ((a) > (b) ? (a) : (b)) +#define Min(a, b) ((a) < (b) ? (a) : (b)) + #define CPUReduceSliceFunctorReduceop(reduceop, beginning) \ template \ struct ReduceSliceFunctor##reduceop { \ @@ -234,6 +239,11 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_GPU_REDUCE_SLICE_KERNELS_ALL); #undef REGISTER_GPU_REDUCE_SLICE_KERNELS #undef REGISTER_GPU_REDUCE_SLICE_KERNELS_ALL +#undef Sum +#undef Prod +#undef Min +#undef Max + #endif // GOOGLE_CUDA } // namespace tensorflow diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h index 69ef521c0120104e23bdb844539282a3bcea3525..12bff1e9161783d897ea1d5bb3327c5d0f92f652 100644 --- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h +++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops.h @@ -21,11 +21,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" -#define Sum(a, b) ((a) + (b)) -#define Prod(a, b) ((a) * (b)) -#define Max(a, b) ((a) > (b) ? (a) : (b)) -#define Min(a, b) ((a) < (b) ? (a) : (b)) - namespace tensorflow { class OpKernelContext; diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc index 9f2be03d718364058da6b63add8752c046798c5b..204b83f7f5f118f418815edb6c482b1c06673845 100644 --- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc +++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc @@ -29,6 +29,11 @@ using GPUDevice = Eigen::GpuDevice; namespace functor { +#define Sum(a, b) ((a) + (b)) +#define Prod(a, b) ((a) * (b)) +#define Max(a, b) ((a) > (b) ? (a) : (b)) +#define Min(a, b) ((a) < (b) ? (a) : (b)) + #define GPUReduceSliceFunctorReduceop(reduceop, beginning) \ template \ __global__ void ReduceSliceDeviceKernel##reduceop( \ @@ -94,6 +99,11 @@ TF_CALL_REAL_NUMBER_TYPES(DEFINE_GPU_SPECS) #undef DEFINE_GPU_REDUCEOP_SPECS_INDEX #undef DEFINE_GPU_SPECS +#undef Sum +#undef Prod +#undef Min +#undef Max + } // namespace functor } // namespace tensorflow diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc index 45d22b739b8c597c7ebda85968aa44cd599a798c..56ec86418de51d7aa5f02e75b17fa88a91d5a2a9 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc @@ -38,8 +38,9 @@ namespace functor { template void TensorCuBlasGemm::operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m, uint64 n, uint64 k, - T alpha, const T* a, int lda, const T* b, - int ldb, T beta, T* c, int ldc) { + float alpha, const T* a, int lda, + const T* b, int ldb, float beta, T* c, + int ldc) { #if GOOGLE_CUDA se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose, se::blas::Transpose::kTranspose}; @@ -60,8 +61,8 @@ void TensorCuBlasGemm::operator()(OpKernelContext* ctx, bool transa, #endif } +template struct TensorCuBlasGemm; template struct TensorCuBlasGemm; -template struct TensorCuBlasGemm; } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h index a52c934233af3dc63e1a60d70fac6a9eba6a655b..9535a76566748eaf8b4756ad0dc26218262ed990 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.h +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h @@ -28,8 +28,8 @@ namespace functor { template struct TensorCuBlasGemm { void operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m, - uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b, - int ldb, T beta, T* c, int ldc); + uint64 n, uint64 k, float alpha, const T* a, int lda, + const T* b, int ldb, float beta, T* c, int ldc); }; template @@ -38,8 +38,9 @@ struct TensorBlasGemm; template struct TensorBlasGemm { static void compute(OpKernelContext* ctx, const Device& d, bool transa, - bool transb, T alpha, typename TTypes::ConstMatrix a, - typename TTypes::ConstMatrix b, T beta, + bool transb, float alpha, + typename TTypes::ConstMatrix a, + typename TTypes::ConstMatrix b, float beta, typename TTypes::Matrix c) { int64 m = c.dimensions()[0]; int64 n = c.dimensions()[1]; diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc index 5e7cf0ce84d332bd24088cd78995f7843813328b..ee08d306f84baaba8b774ce3fa1a04d5f9a4f6dd 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc @@ -44,7 +44,7 @@ namespace functor { template void LSTMBlockCellFpropWithEigen( const LSTMBlockCell& cell, OpKernelContext* ctx, const CPUDevice& d, - const T forget_bias, const T cell_clip, bool use_peephole, + const float forget_bias, const float cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -177,50 +177,51 @@ void LSTMBlockCellBpropWithEigen( } } -#define DEFINE_CPU_SPECS(T) \ - template <> \ - void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const CPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ - typename TTypes::Matrix i, typename TTypes::Matrix cs, \ - typename TTypes::Matrix f, typename TTypes::Matrix o, \ - typename TTypes::Matrix ci, typename TTypes::Matrix co, \ - typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithEigen( \ - *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ - h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ - } \ - template <> \ - void LSTMBlockCellBprop::operator()( \ - OpKernelContext* ctx, const CPUDevice& d, bool use_peephole, \ - typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::ConstMatrix i, \ - typename TTypes::ConstMatrix cs, typename TTypes::ConstMatrix f, \ - typename TTypes::ConstMatrix o, typename TTypes::ConstMatrix ci, \ - typename TTypes::ConstMatrix co, \ - typename TTypes::ConstMatrix cs_grad, \ - typename TTypes::ConstMatrix h_grad, typename TTypes::Matrix do_, \ - typename TTypes::Matrix dcs, typename TTypes::Matrix dci, \ - typename TTypes::Matrix df, typename TTypes::Matrix di, \ - typename TTypes::Matrix dicfo, \ - typename TTypes::Matrix cs_prev_grad, \ - typename TTypes::Vec wci_grad, typename TTypes::Vec wcf_grad, \ - typename TTypes::Vec wco_grad) { \ - LSTMBlockCellBpropWithEigen( \ - *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \ - i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo, \ - cs_prev_grad, wci_grad, wcf_grad, wco_grad); \ - } \ - template struct LSTMBlockCellFprop; \ +#define DEFINE_CPU_SPECS(T) \ + template <> \ + void LSTMBlockCellFprop::operator()( \ + OpKernelContext* ctx, const CPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ + typename TTypes::Matrix i, typename TTypes::Matrix cs, \ + typename TTypes::Matrix f, typename TTypes::Matrix o, \ + typename TTypes::Matrix ci, typename TTypes::Matrix co, \ + typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ + LSTMBlockCellFpropWithEigen( \ + *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ + h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ + } \ + template <> \ + void LSTMBlockCellBprop::operator()( \ + OpKernelContext* ctx, const CPUDevice& d, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::ConstMatrix i, \ + typename TTypes::ConstMatrix cs, typename TTypes::ConstMatrix f, \ + typename TTypes::ConstMatrix o, typename TTypes::ConstMatrix ci, \ + typename TTypes::ConstMatrix co, \ + typename TTypes::ConstMatrix cs_grad, \ + typename TTypes::ConstMatrix h_grad, typename TTypes::Matrix do_, \ + typename TTypes::Matrix dcs, typename TTypes::Matrix dci, \ + typename TTypes::Matrix df, typename TTypes::Matrix di, \ + typename TTypes::Matrix dicfo, \ + typename TTypes::Matrix cs_prev_grad, \ + typename TTypes::Vec wci_grad, typename TTypes::Vec wcf_grad, \ + typename TTypes::Vec wco_grad) { \ + LSTMBlockCellBpropWithEigen( \ + *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \ + i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo, \ + cs_prev_grad, wci_grad, wcf_grad, wco_grad); \ + } \ + template struct LSTMBlockCellFprop; \ template struct LSTMBlockCellBprop; DEFINE_CPU_SPECS(float); @@ -377,24 +378,26 @@ REGISTER_KERNEL(float); #if GOOGLE_CUDA namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ - typename TTypes::Matrix i, typename TTypes::Matrix cs, \ - typename TTypes::Matrix f, typename TTypes::Matrix o, \ - typename TTypes::Matrix ci, typename TTypes::Matrix co, \ - typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ - \ +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void LSTMBlockCellFprop::operator()( \ + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ + typename TTypes::Matrix i, typename TTypes::Matrix cs, \ + typename TTypes::Matrix f, typename TTypes::Matrix o, \ + typename TTypes::Matrix ci, typename TTypes::Matrix co, \ + typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ + \ extern template struct LSTMBlockCellFprop; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -405,6 +408,7 @@ DECLARE_GPU_SPEC(float); LSTMBlockCellOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -629,9 +633,9 @@ class LSTMBlockCellGradOp : public OpKernel { const Device& device = ctx->eigen_device(); - functor::TensorZero()(device, wci_grad_tensor->flat()); - functor::TensorZero()(device, wcf_grad_tensor->flat()); - functor::TensorZero()(device, wco_grad_tensor->flat()); + functor::TensorZero()(device, wci_grad_tensor->flat()); + functor::TensorZero()(device, wcf_grad_tensor->flat()); + functor::TensorZero()(device, wco_grad_tensor->flat()); functor::LSTMBlockCellBprop(batch_size, input_size, cell_size)( @@ -688,6 +692,7 @@ namespace functor { true /* USE_CUBLAS */>; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // namespace functor @@ -698,6 +703,7 @@ DECLARE_GPU_SPEC(float); LSTMBlockCellGradOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -984,10 +990,10 @@ class BlockLSTMOp : public OpKernel { Tensor cs_tensor = cs_out->Slice(seq_len_max, timelen); Tensor h_tensor = h_out->Slice(seq_len_max, timelen); - functor::TensorUnalignedZero()( - device, cs_tensor.unaligned_flat()); - functor::TensorUnalignedZero()( - device, h_tensor.unaligned_flat()); + functor::TensorUnalignedZero()(device, + cs_tensor.unaligned_flat()); + functor::TensorUnalignedZero()(device, + h_tensor.unaligned_flat()); } } @@ -1021,6 +1027,7 @@ namespace functor { extern template struct TensorUnalignedZero; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -1033,6 +1040,7 @@ DECLARE_GPU_SPEC(float); BlockLSTMOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -1195,16 +1203,15 @@ class BlockLSTMGradOp : public OpKernel { const Device& device = ctx->eigen_device(); - functor::TensorZero()(device, cs_grad_tensor.flat()); - functor::TensorZero()(device, - cs_prev_grad_tensor->flat()); - functor::TensorZero()(device, h_grad_tensor.flat()); - functor::TensorZero()(device, h_prev_grad_tensor->flat()); - functor::TensorZero()(device, w_grad_tensor->flat()); - functor::TensorZero()(device, wci_grad_tensor->flat()); - functor::TensorZero()(device, wcf_grad_tensor->flat()); - functor::TensorZero()(device, wco_grad_tensor->flat()); - functor::TensorZero()(device, b_grad_tensor->flat()); + functor::TensorZero()(device, cs_grad_tensor.flat()); + functor::TensorZero()(device, cs_prev_grad_tensor->flat()); + functor::TensorZero()(device, h_grad_tensor.flat()); + functor::TensorZero()(device, h_prev_grad_tensor->flat()); + functor::TensorZero()(device, w_grad_tensor->flat()); + functor::TensorZero()(device, wci_grad_tensor->flat()); + functor::TensorZero()(device, wcf_grad_tensor->flat()); + functor::TensorZero()(device, wco_grad_tensor->flat()); + functor::TensorZero()(device, b_grad_tensor->flat()); const int64 seq_len_max = seq_len_max_tensor->scalar()(); SliceHelper slicer(ctx); @@ -1331,6 +1338,7 @@ namespace functor { extern template struct BlockLSTMBprop; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -1343,6 +1351,7 @@ DECLARE_GPU_SPEC(float); BlockLSTMGradOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h index d23cedc234b8c0e1a784346f28164ae79b8cbf89..5ca1dad6552cff857d33232d8197fe069036841a 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.h +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h @@ -77,8 +77,7 @@ template struct TensorZeroPadding { void operator()(const Device& d, const int64 time_idx, typename TTypes::ConstVec seq_len, - typename TTypes::Vec mask, - typename TTypes::Matrix m) { + typename TTypes::Vec mask, typename TTypes::Matrix m) { // mask is shape [batch_size]. mask.device(d) = seq_len.constant(time_idx) < seq_len; @@ -154,18 +153,21 @@ struct LSTMBlockCellFprop : public LSTMBlockCell { const int cell_size) : LSTMBlockCell(batch_size, input_size, cell_size) {} - void operator()( - OpKernelContext* ctx, const Device& d, const T forget_bias, - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, - typename TTypes::ConstMatrix cs_prev, - typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, - typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, - typename TTypes::ConstVec wco, typename TTypes::ConstVec b, - typename TTypes::Matrix xh, typename TTypes::Matrix i, - typename TTypes::Matrix cs, typename TTypes::Matrix f, - typename TTypes::Matrix o, typename TTypes::Matrix ci, - typename TTypes::Matrix co, typename TTypes::Matrix icfo, - typename TTypes::Matrix h); + void operator()(OpKernelContext* ctx, const Device& d, + const float forget_bias, const float cell_clip, + bool use_peephole, typename TTypes::ConstMatrix x, + typename TTypes::ConstMatrix cs_prev, + typename TTypes::ConstMatrix h_prev, + typename TTypes::ConstMatrix w, + typename TTypes::ConstVec wci, + typename TTypes::ConstVec wcf, + typename TTypes::ConstVec wco, + typename TTypes::ConstVec b, typename TTypes::Matrix xh, + typename TTypes::Matrix i, typename TTypes::Matrix cs, + typename TTypes::Matrix f, typename TTypes::Matrix o, + typename TTypes::Matrix ci, typename TTypes::Matrix co, + typename TTypes::Matrix icfo, + typename TTypes::Matrix h); }; // See lstm_ops.cc for CPUDevice implementation and lstm_ops_gpu.cu.cc for @@ -261,7 +263,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { typename TTypes::ConstMatrix const_dicfo(dicfo.data(), dicfo.dimensions()); TensorBlasGemm::compute( - ctx, d, false, true, T(1), const_dicfo, w, T(0), xh_grad); + ctx, d, false, true, 1.f, const_dicfo, w, 0.f, xh_grad); // xh. xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x; @@ -274,7 +276,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { // w_grad. TensorBlasGemm::compute( - ctx, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad); + ctx, d, true, false, 1.f, const_xh, const_dicfo, 1.f, w_grad); // b_grad. b_grad.device(d) += dicfo.sum(Eigen::array({0})); diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc index 6d3758fef15e7130b740a377d8bcd41d31203299..b664b0f45ee08648e4dc10e8244340df1615ad19 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc @@ -31,6 +31,49 @@ typedef Eigen::GpuDevice GPUDevice; namespace { +struct FloatToHalf { + __host__ __device__ EIGEN_STRONG_INLINE Eigen::half operator()( + const float& x) const { + return Eigen::half_impl::float_to_half_rtne(x); + } +}; + +template +__host__ __device__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t); + +template +__host__ __device__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t) { + return t; +} + +template <> +__host__ __device__ EIGEN_STRONG_INLINE Eigen::half +strict_cast(float t) { + return FloatToHalf()(t); +} + +} // namespace + +template +struct TensorZero { + void operator()(const GPUDevice& d, typename TTypes::Flat t) { + t.device(d) = t.constant(strict_cast(0.f)); + } +}; + +template +struct TensorUnalignedZero { + void operator()(const GPUDevice& d, typename TTypes::UnalignedFlat t) { + t.device(d) = t.constant(strict_cast(0.f)); + } +}; + +namespace { + // Adds bias, applies non-linearities and gates. // // Launch with a 2D setup such that there is one thread per (example, @@ -42,12 +85,15 @@ namespace { template __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev, const T* wci, const T* wcf, const T* wco, T* o, T* h, - T* ci, T* cs, T* co, T* i, T* f, const T forget_bias, - const T cell_clip, const int batch_size, - const int cell_size) { + T* ci, T* cs, T* co, T* i, T* f, + const float forget_bias, const float cell_clip, + const int batch_size, const int cell_size) { const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; const int act_id = blockIdx.y * blockDim.y + threadIdx.y; + T forget_bias_t = strict_cast(forget_bias); + T cell_clip_t = strict_cast(cell_clip); + if (batch_id >= batch_size || act_id >= cell_size) return; // The following code assumes the input arrays are of the following @@ -115,16 +161,16 @@ __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev, T f_local; if (use_peephole) { f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + - forget_bias + cs_prev[cid] * wcf[act_id]); + forget_bias_t + cs_prev[cid] * wcf[act_id]); } else { f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + - forget_bias); + forget_bias_t); } f[cid] = f_local; T cs_local = i_local * ci_local + f_local * cs_prev[cid]; - if (cell_clip > 0.0) { - cs_local = clip_op(cs_local, cell_clip); + if (cell_clip_t > strict_cast(0.0f)) { + cs_local = clip_op(cs_local, cell_clip_t); } cs[cid] = cs_local; @@ -174,8 +220,8 @@ __global__ void concat_xh(T* xh, const T* x, const T* h_prev, template void LSTMBlockCellFpropWithCUDA( - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, + const float cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -202,7 +248,7 @@ void LSTMBlockCellFpropWithCUDA( // states1 = xh * w typename TTypes::ConstMatrix const_xh(xh.data(), xh.dimensions()); TensorBlasGemm::compute( - ctx, d, false, false, T(1), const_xh, w, T(0), icfo); + ctx, d, false, false, 1.f, const_xh, w, 0.f, icfo); // Add bias, apply non-linearities and gating. // @@ -357,8 +403,9 @@ void LSTMBlockCellBpropWithCUDA( template struct TensorAdd; \ template <> \ void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ typename TTypes::ConstMatrix cs_prev, \ typename TTypes::ConstMatrix h_prev, \ typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ @@ -368,10 +415,10 @@ void LSTMBlockCellBpropWithCUDA( typename TTypes::Matrix f, typename TTypes::Matrix o, \ typename TTypes::Matrix ci, typename TTypes::Matrix co, \ typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, use_peephole, \ - x, cs_prev, h_prev, w, wci, wcf, wco, b, xh, i, \ - cs, f, o, ci, co, icfo, h, batch_size_, \ - cell_size_, input_size_); \ + LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, \ + use_peephole, x, cs_prev, h_prev, w, wci, \ + wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, \ + h, batch_size_, cell_size_, input_size_); \ } \ template <> \ void LSTMBlockCellBprop::operator()( \ @@ -403,6 +450,7 @@ void LSTMBlockCellBpropWithCUDA( template struct BlockLSTMBprop; DEFINE_GPU_SPECS(float); +DEFINE_GPU_SPECS(Eigen::half); // DEFINE_GPU_SPECS(double); #undef DEFINE_GPU_SPECS diff --git a/tensorflow/contrib/rnn/ops/lstm_ops.cc b/tensorflow/contrib/rnn/ops/lstm_ops.cc index 699cc6c88a4634334b2621a7f48cbbeae1dc9a45..1679e355184f0622ba5e82dd9334edab3fe4c6f3 100644 --- a/tensorflow/contrib/rnn/ops/lstm_ops.cc +++ b/tensorflow/contrib/rnn/ops/lstm_ops.cc @@ -41,7 +41,7 @@ REGISTER_OP("LSTMBlockCell") .Attr("forget_bias: float = 1.0") .Attr("cell_clip: float = 3.0") .Attr("use_peephole: bool = false") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x)); @@ -128,7 +128,7 @@ REGISTER_OP("LSTMBlockCellGrad") .Output("wcf_grad: T") .Output("wco_grad: T") .Attr("use_peephole: bool") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x)); @@ -196,7 +196,7 @@ REGISTER_OP("BlockLSTM") .Attr("forget_bias: float = 1.0") .Attr("cell_clip: float = 3.0") .Attr("use_peephole: bool = false") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, b; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x)); @@ -288,7 +288,7 @@ REGISTER_OP("BlockLSTMGrad") .Output("wco_grad: T") .Output("b_grad: T") .Attr("use_peephole: bool") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev, h_prev, w, wci, wco, wcf, b; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x)); diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index be0306cb0703765984c810bfdae3ad6f1f8441f0..572df58e5228d9beb1489cc217e79c4a032e4cd2 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -695,7 +695,7 @@ class RNNCellTest(test.TestCase): return gpu_dev = test.gpu_device_name() - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 1, 3]) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py index f31ad53d3c4274ca7a9027ea2e3bdea5424bd567..5cba54dd3df5bbb33380505bd5a073f069a3a590 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py @@ -211,7 +211,7 @@ class RNNTest(test.TestCase): self.assertEqual(out.get_shape(), inp.get_shape()) self.assertEqual(out.dtype, inp.dtype) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: input_value = np.random.randn(batch_size, input_size) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) @@ -247,7 +247,7 @@ class RNNTest(test.TestCase): self.assertEqual(out.get_shape().as_list(), inp.get_shape().as_list()) self.assertEqual(out.dtype, inp.dtype) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: input_value = np.random.randn(batch_size, input_size) values = sess.run(outputs + [state], feed_dict={inputs[0]: input_value}) full_dropout_values = sess.run( @@ -274,7 +274,7 @@ class RNNTest(test.TestCase): cell, inputs, sequence_length=sequence_length, dtype=dtypes.float32) self.assertEqual(len(dynamic_outputs), len(inputs)) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: input_value = np.random.randn(batch_size, input_size) dynamic_values = sess.run( dynamic_outputs, @@ -310,7 +310,7 @@ class RNNTest(test.TestCase): 1.0 * (2 + 1) * np.ones((input_size))))) def _testScope(self, factory, prefix="prefix", use_outer_scope=True): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -372,7 +372,7 @@ class LSTMTest(test.TestCase): input_size = 5 batch_size = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) cell = rnn_cell.LSTMCell( @@ -394,7 +394,7 @@ class LSTMTest(test.TestCase): input_size = 5 batch_size = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) cell = rnn_cell.LSTMCell( @@ -424,7 +424,7 @@ class LSTMTest(test.TestCase): input_size = 5 batch_size = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) state_saver = TestStateSaver(batch_size, 2 * num_units) @@ -562,7 +562,7 @@ class LSTMTest(test.TestCase): batch_size = 2 num_proj = 4 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) inputs = max_length * [ @@ -659,7 +659,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) @@ -692,7 +692,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer(-1, 1, seed=self._seed) inputs = max_length * [ array_ops.placeholder(dtypes.float64, shape=(None, input_size)) @@ -728,7 +728,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: inputs = max_length * [ array_ops.placeholder(dtypes.float32, shape=(None, input_size)) ] @@ -784,7 +784,7 @@ class LSTMTest(test.TestCase): num_proj_shards = 3 num_unit_shards = 2 max_length = 8 - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: sequence_length = array_ops.placeholder(dtypes.int64) initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) @@ -1117,7 +1117,7 @@ class LSTMTest(test.TestCase): state_is_tuple=False) ########### Step 1: Run static graph and generate readouts - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: if in_graph_mode: concat_inputs = array_ops.placeholder( dtypes.float32, shape=(time_steps, batch_size, input_size)) @@ -1177,7 +1177,7 @@ class LSTMTest(test.TestCase): static_individual_variable_gradients, feed_dict=feeds) ########## Step 2: Run dynamic graph and generate readouts - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: if in_graph_mode: concat_inputs = array_ops.placeholder( dtypes.float32, shape=(time_steps, batch_size, input_size)) @@ -1337,7 +1337,7 @@ class BidirectionalRNNTest(test.TestCase): return input_value, inputs, outputs, state_fw, state_bw, sequence_length def _testBidirectionalRNN(self, use_shape): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createBidirectionalRNN(use_shape, True)) variables_lib.global_variables_initializer().run() @@ -1384,7 +1384,7 @@ class BidirectionalRNNTest(test.TestCase): self.assertAllClose(s_fw, s_bw) def _testBidirectionalRNNWithoutSequenceLength(self, use_shape): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, _ = ( self._createBidirectionalRNN(use_shape, False)) variables_lib.global_variables_initializer().run() @@ -1472,7 +1472,7 @@ class BidirectionalRNNTest(test.TestCase): def _testBidirectionalDynamicRNN(self, use_shape, use_state_tuple, use_time_major, use_sequence_length): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createBidirectionalDynamicRNN( use_shape, use_state_tuple, use_time_major, use_sequence_length)) @@ -1549,7 +1549,7 @@ class BidirectionalRNNTest(test.TestCase): # REMARKS: factory(scope) is a function accepting a scope # as an argument, such scope can be None, a string # or a VariableScope instance. - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -1868,7 +1868,7 @@ class StateSaverRNNTest(test.TestCase): batch_size = 2 state_saver = TestStateSaver(batch_size, 2 * num_units) - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: self._factory(scope=scope, state_saver=state_saver) @@ -1945,7 +1945,7 @@ class GRUTest(test.TestCase): sequence_length = np.random.randint(0, time_steps, size=batch_size) - with self.test_session(use_gpu=True, graph=ops_lib.Graph()) as sess: + with self.session(use_gpu=True, graph=ops_lib.Graph()) as sess: concat_inputs = array_ops.placeholder( dtypes.float32, shape=(time_steps, batch_size, input_size)) @@ -1967,7 +1967,7 @@ class GRUTest(test.TestCase): sess.run([outputs_dynamic, state_dynamic], feed_dict=feeds) def _testScope(self, factory, prefix="prefix", use_outer_scope=True): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -2253,7 +2253,7 @@ class RawRNNTest(test.TestCase): np.ones((max_time, batch_size, 1), np.int64), output_vals[1]) def _testScope(self, factory, prefix="prefix", use_outer_scope=True): - with self.test_session(use_gpu=True, graph=ops_lib.Graph()): + with self.session(use_gpu=True, graph=ops_lib.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) @@ -2370,7 +2370,7 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): sequence_length=sequence_length, dtype=dtypes.float32) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: opts = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() variables_lib.global_variables_initializer().run() diff --git a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py index b865466cc75aa67fcd192f7726f65141409b896a..50d0da6eaf26bbb621907193f2915eaf3c9eadba 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py @@ -41,7 +41,7 @@ from tensorflow.python.training import gradient_descent class GRUBlockCellTest(test.TestCase): def testNoneDimsWithDynamicRNN(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 4 cell_size = 5 input_size = 6 @@ -58,7 +58,7 @@ class GRUBlockCellTest(test.TestCase): sess.run(output, feed) def testBlockGRUToGRUCellSingleStep(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 4 cell_size = 5 input_size = 6 @@ -91,7 +91,7 @@ class GRUBlockCellTest(test.TestCase): self.assertAllClose(block, basic) def testBlockGRUToGRUCellMultiStep(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 2 cell_size = 3 input_size = 3 @@ -150,7 +150,7 @@ class GRUBlockCellTest(test.TestCase): self.assertAllClose(block_res[1], block_res[1]) def testDerivativeOfBlockGRUToGRUCellSingleStep(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 2 cell_size = 3 input_size = 4 @@ -220,7 +220,7 @@ class GRUBlockCellTest(test.TestCase): cell_size = 3 input_size = 4 time_steps = 2 - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: # Random initializers. seed = 1994 initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=seed) @@ -287,7 +287,7 @@ class GRUBlockCellTest(test.TestCase): self.assertAllClose(block, basic) def testGradient(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 1 cell_size = 3 input_size = 2 diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py index ffd24218944e150a32b1b915288ab1df90afb45c..9ce0b399ba173b67285e907a050c71af5d57068c 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py @@ -130,7 +130,7 @@ def blocks_match(sess, use_peephole): class LSTMBlockCellTest(test.TestCase): def testNoneDimsWithDynamicRNN(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: batch_size = 4 num_steps = 5 input_dim = 6 @@ -147,7 +147,7 @@ class LSTMBlockCellTest(test.TestCase): sess.run(output, feed) def testLSTMBlockCell(self): - with self.test_session(use_gpu=True, graph=ops.Graph()) as sess: + with self.session(use_gpu=True, graph=ops.Graph()) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 2]) @@ -175,7 +175,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(res[4], [[0.24024698, 0.24024698]]) def testCompatibleNames(self): - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): cell = rnn_cell.LSTMCell(10) pcell = rnn_cell.LSTMCell(10, use_peepholes=True) inputs = [array_ops.zeros([4, 5])] * 6 @@ -186,7 +186,7 @@ class LSTMBlockCellTest(test.TestCase): for v in variables.trainable_variables() } - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockCell(10) pcell = lstm_ops.LSTMBlockCell(10, use_peephole=True) inputs = [array_ops.zeros([4, 5])] * 6 @@ -197,7 +197,7 @@ class LSTMBlockCellTest(test.TestCase): for v in variables.trainable_variables() } - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): cell = lstm_ops.LSTMBlockFusedCell(10) pcell = lstm_ops.LSTMBlockFusedCell(10, use_peephole=True) inputs = array_ops.stack([array_ops.zeros([4, 5])] * 6) @@ -212,7 +212,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertEqual(basic_names, fused_names) def testLSTMBasicToBlockCell(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: x = array_ops.zeros([1, 2]) x_values = np.random.randn(1, 2) @@ -262,7 +262,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(basic, block) def testLSTMBasicToBlockCellPeeping(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: x = array_ops.zeros([1, 2]) x_values = np.random.randn(1, 2) @@ -315,7 +315,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(basic, block) def testLSTMBasicToBlock(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads, fused_wgrads) = blocks_match( @@ -333,7 +333,7 @@ class LSTMBlockCellTest(test.TestCase): self.assertAllClose(basic, fused, rtol=1e-6, atol=1e-6) def testLSTMBasicToBlockPeeping(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: (basic_state, fused_state, basic_outputs, block_outputs, fused_outputs, basic_grads, block_grads, fused_grads, basic_wgrads, block_wgrads, fused_wgrads) = blocks_match( @@ -352,7 +352,7 @@ class LSTMBlockCellTest(test.TestCase): def testLSTMFusedSequenceLengths(self): """Verify proper support for sequence lengths in LSTMBlockFusedCell.""" - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: batch_size = 3 input_size = 4 cell_size = 5 diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py index eda8cb3c12f7b7e3dac9b21347854bda4dcfe729..32df1db964a9467a921e5bf9747d4a7ebc17bb9e 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py @@ -99,7 +99,7 @@ class StackBidirectionalRNNTest(test.TestCase): return input_value, inputs, outputs, state_fw, state_bw, sequence_length def _testStackBidirectionalRNN(self, use_gpu, use_shape): - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createStackBidirectionalRNN(use_gpu, use_shape, True)) variables.global_variables_initializer().run() @@ -159,7 +159,7 @@ class StackBidirectionalRNNTest(test.TestCase): # - Check that the state_5 and state_5' (forward and backward) are the # same for the first layer (it does not apply for the second layer since # it has forward-backward dependencies). - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: batch_size = 2 # Create states placeholders. initial_states_fw = [ @@ -281,7 +281,7 @@ class StackBidirectionalRNNTest(test.TestCase): def _testStackBidirectionalDynamicRNN(self, use_gpu, use_shape, use_state_tuple): - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: input_value, inputs, outputs, state_fw, state_bw, sequence_length = ( self._createStackBidirectionalDynamicRNN(use_gpu, use_shape, use_state_tuple)) @@ -343,7 +343,7 @@ class StackBidirectionalRNNTest(test.TestCase): # - Check that the state_5 and state_5' (forward and backward) are the # same for the first layer (it does not apply for the second layer since # it has forward-backward dependencies). - with self.test_session(use_gpu=use_gpu, graph=ops.Graph()) as sess: + with self.session(use_gpu=use_gpu, graph=ops.Graph()) as sess: batch_size = 2 # Create states placeholders. initial_states_fw = [ @@ -414,7 +414,7 @@ class StackBidirectionalRNNTest(test.TestCase): # REMARKS: factory(scope) is a function accepting a scope # as an argument, such scope can be None, a string # or a VariableScope instance. - with self.test_session(use_gpu=True, graph=ops.Graph()): + with self.session(use_gpu=True, graph=ops.Graph()): if use_outer_scope: with variable_scope.variable_scope(prefix) as scope: factory(scope) diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py index 9e61fc54d10c1b75786450060e428c73974760a7..f2975b98061da45895481438aa34d2a6f6901a3a 100644 --- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py +++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py @@ -346,6 +346,7 @@ class LSTMBlockCell(LayerRNNCell): forget_bias=1.0, cell_clip=None, use_peephole=False, + dtype=None, reuse=None, name="lstm_cell"): """Initialize the basic LSTM cell. @@ -355,6 +356,7 @@ class LSTMBlockCell(LayerRNNCell): forget_bias: float, The bias added to forget gates (see above). cell_clip: An optional `float`. Defaults to `-1` (no clipping). use_peephole: Whether to use peephole connections or not. + dtype: the variable dtype of this layer. Default to tf.float32. reuse: (optional) boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. @@ -366,7 +368,7 @@ class LSTMBlockCell(LayerRNNCell): When restoring from CudnnLSTM-trained checkpoints, must use CudnnCompatibleLSTMBlockCell instead. """ - super(LSTMBlockCell, self).__init__(_reuse=reuse, name=name) + super(LSTMBlockCell, self).__init__(_reuse=reuse, dtype=dtype, name=name) self._num_units = num_units self._forget_bias = forget_bias self._use_peephole = use_peephole @@ -417,7 +419,7 @@ class LSTMBlockCell(LayerRNNCell): wcf = self._w_f_diag wco = self._w_o_diag else: - wci = wcf = wco = array_ops.zeros([self._num_units]) + wci = wcf = wco = array_ops.zeros([self._num_units], dtype=self.dtype) (cs_prev, h_prev) = state (_, cs, _, _, _, _, h) = _lstm_block_cell( @@ -596,23 +598,26 @@ class LSTMBlockFusedCell(LSTMBlockWrapper): cell_clip=None, use_peephole=False, reuse=None, + dtype=None, name="lstm_fused_cell"): """Initialize the LSTM cell. Args: num_units: int, The number of units in the LSTM cell. forget_bias: float, The bias added to forget gates (see above). - cell_clip: clip the cell to this value. Default is no cell clipping. + cell_clip: clip the cell to this value. Defaults is no cell clipping. use_peephole: Whether to use peephole connections or not. reuse: (optional) boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. + dtype: the dtype of variables of this layer. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. By default this is "lstm_cell", for variable-name compatibility with `tf.nn.rnn_cell.LSTMCell`. """ - super(LSTMBlockFusedCell, self).__init__(_reuse=reuse, name=name) + super(LSTMBlockFusedCell, self).__init__( + _reuse=reuse, name=name, dtype=dtype) self._num_units = num_units self._forget_bias = forget_bias self._cell_clip = cell_clip if cell_clip is not None else -1 diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 78cea8feb4ccee25c2579fa9cb6d04406e80b2f9..0f693e915415598c6b6327e63a10deb60a7f6d27 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -1110,7 +1110,7 @@ _Linear = core_rnn_cell._Linear # pylint: disable=invalid-name class AttentionCellWrapper(rnn_cell_impl.RNNCell): """Basic attention cell wrapper. - Implementation based on https://arxiv.org/abs/1409.0473. + Implementation based on https://arxiv.org/abs/1601.06733. """ def __init__(self, diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py index 1f3b533de9592d1ed3b27980e25911fe94a829f6..c1e36b2ea3677f742f7c699b616def0c0147e063 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/attention_wrapper_test.py @@ -181,7 +181,7 @@ class AttentionWrapperTest(test.TestCase): for creator, depth in zip(create_attention_mechanisms, attention_mechanism_depths)] - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with vs.variable_scope( 'root', initializer=init_ops.random_normal_initializer(stddev=0.01, seed=3)): @@ -724,7 +724,7 @@ class AttentionWrapperTest(test.TestCase): def testBahdanauMonotonicHard(self): # Run attention mechanism with mode='hard', make sure probabilities are hard b, t, u, d = 10, 20, 30, 40 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: a = wrapper.BahdanauMonotonicAttention( d, random_ops.random_normal((b, t, u)), @@ -898,7 +898,7 @@ class AttentionWrapperTest(test.TestCase): def testLuongMonotonicHard(self): # Run attention mechanism with mode='hard', make sure probabilities are hard b, t, u, d = 10, 20, 30, 40 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: a = wrapper.LuongMonotonicAttention( d, random_ops.random_normal((b, t, u)), diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py index fa3f074c67ce44e592146f451e7c2fac0617c63a..b7f9f3fb090356a1c8d2bfb5044712ff93e267ce 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/basic_decoder_test.py @@ -50,7 +50,7 @@ class BasicDecoderTest(test.TestCase): cell_depth = 10 output_layer_depth = 3 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn(batch_size, max_time, input_depth).astype(np.float32) cell = rnn_cell.LSTMCell(cell_depth) @@ -136,7 +136,7 @@ class BasicDecoderTest(test.TestCase): start_tokens = np.random.randint(0, vocabulary_size, size=batch_size) end_token = 1 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: embeddings = np.random.randn(vocabulary_size, input_depth).astype(np.float32) cell = rnn_cell.LSTMCell(vocabulary_size) @@ -209,7 +209,7 @@ class BasicDecoderTest(test.TestCase): start_tokens = np.random.randint(0, vocabulary_size, size=batch_size) end_token = 1 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "testStepWithSampleEmbeddingHelper", initializer=init_ops.constant_initializer(0.01)): @@ -278,7 +278,7 @@ class BasicDecoderTest(test.TestCase): input_depth = 7 vocabulary_size = 10 - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn( batch_size, max_time, input_depth).astype(np.float32) embeddings = np.random.randn( @@ -371,7 +371,7 @@ class BasicDecoderTest(test.TestCase): else: auxiliary_inputs = None - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn(batch_size, max_time, input_depth).astype(np.float32) cell = rnn_cell.LSTMCell(cell_depth) @@ -523,7 +523,7 @@ class BasicDecoderTest(test.TestCase): lambda x: array_ops.one_hot(x, vocabulary_size, dtype=dtypes.float32)) end_fn = lambda sample_ids: math_ops.equal(sample_ids, end_token) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "testStepWithInferenceHelper", initializer=init_ops.constant_initializer(0.01)): @@ -604,7 +604,7 @@ class BasicDecoderTest(test.TestCase): next_inputs_fn = math_ops.to_float end_fn = lambda sample_ids: sample_ids[:, end_token] - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( "testStepWithInferenceHelper", initializer=init_ops.constant_initializer(0.01)): diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py index 9662a5780a083f41060cfa6624f249ed328d8112..b41734d214e98cd24be0c98ee67f7cb5e58b7a61 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py @@ -49,7 +49,7 @@ class GatherTreeTest(test.TestCase): parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=end_token) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): self.assertAllEqual(expected_result, beams.eval()) def testBadParentValuesOnCPU(self): @@ -93,7 +93,7 @@ class GatherTreeTest(test.TestCase): parent_ids=parent_ids, max_sequence_lengths=max_sequence_lengths, end_token=end_token) - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): self.assertAllEqual(expected_result, beams.eval()) def testGatherTreeBatch(self): @@ -103,7 +103,7 @@ class GatherTreeTest(test.TestCase): max_sequence_lengths = [0, 1, 2, 4, 7, 8, 9, 10, 11, 0] end_token = 5 - with self.test_session(use_gpu=True): + with self.session(use_gpu=True): step_ids = np.random.randint( 0, high=end_token + 1, size=(max_time, batch_size, beam_width)) parent_ids = np.random.randint( diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py index b549cbf568f254cbf18456145af751a8245dd379..4c25489fade320f2f2218354343021a71af01baf 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_test.py @@ -44,7 +44,7 @@ class DynamicDecodeRNNTest(test.TestCase): cell_depth = 10 max_out = max(sequence_length) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: if time_major: inputs = np.random.randn(max_time, batch_size, input_depth).astype(np.float32) @@ -126,7 +126,7 @@ class DynamicDecodeRNNTest(test.TestCase): cell_depth = 10 max_out = max(sequence_length) - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: inputs = np.random.randn(batch_size, max_time, input_depth).astype(np.float32) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py index 35c601a4bcf795ab951218851a3699b3288a69b1..5aa32b532ffcf5772f6ace26662f5e5471cf6923 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/loss_test.py @@ -34,7 +34,7 @@ from tensorflow.python.platform import test class LossTest(test.TestCase): def testSequenceLoss(self): - with self.test_session(use_gpu=True) as sess: + with self.session(use_gpu=True) as sess: with variable_scope.variable_scope( 'root', initializer=init_ops.constant_initializer(0.5)): batch_size = 2 diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py index 360e7dbe75f595ff61fb83379089294371203813..7743f5b4a7fbd2d80ff77a130ee50b9ea7e261ee 100644 --- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py +++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_loss_test.py @@ -109,6 +109,42 @@ class SparsemaxLossTest(test.TestCase): np_loss, tf_loss_out, half_atol=1e-2, half_rtol=5e-3) self.assertShapeEqual(np_loss, tf_loss_op) + def _test_sparsemax_loss_of_nan(self, dtype, random, use_gpu): + """check sparsemax-loss transfers nan""" + q = np.asarray([[0, 0, 1], [0, 0, 1], [0, 0, 1]]) + z_nan = np.asarray([[0, np.nan, 0], [0, np.nan, np.nan], + [np.nan, np.nan, np.nan]]).astype(dtype) + + _, tf_loss_nan = self._tf_sparsemax_loss(z_nan, q, dtype, use_gpu) + self.assertAllCloseAccordingToType([np.nan, np.nan, np.nan], tf_loss_nan) + + def _test_sparsemax_loss_of_inf(self, dtype, random, use_gpu): + """check sparsemax-loss is infinity safe""" + q = np.asarray([[0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 1]]) + z_neg = np.asarray([ + [0, -np.inf, 0], + [0, -np.inf, -np.inf], + [-np.inf, -np.inf, 0], + [-np.inf, -np.inf, -np.inf], + ]).astype(dtype) + z_pos = np.asarray([[0, np.inf, 0], [0, np.inf, + np.inf], [np.inf, np.inf, 0], + [np.inf, np.inf, np.inf]]).astype(dtype) + z_mix = np.asarray([[0, np.inf, 0], [0, np.inf, -np.inf], + [-np.inf, np.inf, 0], [-np.inf, np.inf, + -np.inf]]).astype(dtype) + + _, tf_loss_neg = self._tf_sparsemax_loss(z_neg, q, dtype, use_gpu) + self.assertAllCloseAccordingToType([0.25, np.inf, 0, np.nan], tf_loss_neg) + + _, tf_loss_pos = self._tf_sparsemax_loss(z_pos, q, dtype, use_gpu) + self.assertAllCloseAccordingToType([np.nan, np.nan, np.nan, np.nan], + tf_loss_pos) + + _, tf_loss_mix = self._tf_sparsemax_loss(z_mix, q, dtype, use_gpu) + self.assertAllCloseAccordingToType([np.nan, np.nan, np.nan, np.nan], + tf_loss_mix) + def _test_constant_add(self, dtype, random, use_gpu): """check sparsemax-loss proposition 3""" z = random.uniform(low=-3, high=3, size=(test_obs, 10)) @@ -198,6 +234,10 @@ class SparsemaxLossTest(test.TestCase): self._test_sparsemax_loss_against_numpy(dtype, random, use_gpu=False) + self._test_sparsemax_loss_of_nan(dtype, random, use_gpu=False) + + self._test_sparsemax_loss_of_inf(dtype, random, use_gpu=False) + self._test_constant_add(dtype, random, use_gpu=False) self._test_sparsemax_loss_positive(dtype, random, use_gpu=False) diff --git a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py index 259e62bd864fba3cc7d9aa387e02c8319438d658..c95b9da1e4c2c144e2b79a88028874c27a7f8b96 100644 --- a/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py +++ b/tensorflow/contrib/sparsemax/python/kernel_tests/sparsemax_test.py @@ -87,6 +87,46 @@ class SparsemaxTest(test.TestCase): p_sparemax, tf_sparsemax_out, half_atol=5e-3) self.assertShapeEqual(p_sparemax, tf_sparsemax_op) + def _test_sparsemax_of_nan(self, dtype, random, use_gpu): + """check sparsemax transfers nan""" + z_nan = np.asarray([ + [0, np.nan, 0], + [0, np.nan, np.nan], + [np.nan, np.nan, np.nan], + ]).astype(dtype) + + _, tf_sparsemax_nan = self._tf_sparsemax(z_nan, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan]], tf_sparsemax_nan) + + def _test_sparsemax_of_inf(self, dtype, random, use_gpu): + """check sparsemax is infinity safe""" + z_neg = np.asarray([ + [0, -np.inf, 0], + [0, -np.inf, -np.inf], + [-np.inf, -np.inf, -np.inf], + ]).astype(dtype) + z_pos = np.asarray([[0, np.inf, 0], [0, np.inf, np.inf], + [np.inf, np.inf, np.inf]]).astype(dtype) + z_mix = np.asarray([[0, np.inf, 0], [0, np.inf, -np.inf], + [-np.inf, np.inf, -np.inf]]).astype(dtype) + + _, tf_sparsemax_neg = self._tf_sparsemax(z_neg, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [[0.5, 0, 0.5], [1, 0, 0], [np.nan, np.nan, np.nan]], tf_sparsemax_neg) + + _, tf_sparsemax_pos = self._tf_sparsemax(z_pos, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan]], tf_sparsemax_pos) + + _, tf_sparsemax_mix = self._tf_sparsemax(z_mix, dtype, use_gpu) + self.assertAllCloseAccordingToType( + [[np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan]], tf_sparsemax_mix) + + def _test_sparsemax_of_zero(self, dtype, random, use_gpu): """check sparsemax proposition 1, part 1""" z = np.zeros((1, 10)) @@ -97,7 +137,7 @@ class SparsemaxTest(test.TestCase): self.assertAllCloseAccordingToType(p_sparemax, tf_sparsemax_out) self.assertShapeEqual(p_sparemax, tf_sparsemax_op) - def _test_sparsemax_of_inf(self, dtype, random, use_gpu): + def _test_sparsemax_of_to_inf(self, dtype, random, use_gpu): """check sparsemax proposition 1, part 2""" z = random.uniform(low=-3, high=3, size=(test_obs, 10)) @@ -210,10 +250,14 @@ class SparsemaxTest(test.TestCase): self._test_sparsemax_against_numpy(dtype, random, use_gpu=False) - self._test_sparsemax_of_zero(dtype, random, use_gpu=False) + self._test_sparsemax_of_nan(dtype, random, use_gpu=False) self._test_sparsemax_of_inf(dtype, random, use_gpu=False) + self._test_sparsemax_of_zero(dtype, random, use_gpu=False) + + self._test_sparsemax_of_to_inf(dtype, random, use_gpu=False) + self._test_constant_add(dtype, random, use_gpu=False) self._test_permutation(dtype, random, use_gpu=False) diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py index e617af2ff1b731eddb5b72469a1cd67e7cfd163f..f79c93f34750e96ec6bd7cdf4d0dad9981d6a2b3 100644 --- a/tensorflow/contrib/sparsemax/python/ops/sparsemax.py +++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax.py @@ -49,7 +49,14 @@ def sparsemax(logits, name=None): obs = array_ops.shape(logits)[0] dims = array_ops.shape(logits)[1] - z = logits - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis] + # In the paper, they call the logits z. + # The mean(logits) can be substracted from logits to make the algorithm + # more numerically stable. the instability in this algorithm comes mostly + # from the z_cumsum. Substacting the mean will cause z_cumsum to be close + # to zero. However, in practise the numerical instability issues are very + # minor and substacting the mean causes extra issues with inf and nan + # input. + z = logits # sort z z_sorted, _ = nn.top_k(z, k=dims) @@ -64,10 +71,24 @@ def sparsemax(logits, name=None): k_z = math_ops.reduce_sum(math_ops.cast(z_check, dtypes.int32), axis=1) # calculate tau(z) - indices = array_ops.stack([math_ops.range(0, obs), k_z - 1], axis=1) + # If there are inf values or all values are -inf, the k_z will be zero, + # this is mathematically invalid and will also cause the gather_nd to fail. + # Prevent this issue for now by setting k_z = 1 if k_z = 0, this is then + # fixed later (see p_safe) by returning p = nan. This results in the same + # behavior as softmax. + k_z_safe = math_ops.maximum(k_z, 1) + indices = array_ops.stack([math_ops.range(0, obs), k_z_safe - 1], axis=1) tau_sum = array_ops.gather_nd(z_cumsum, indices) tau_z = (tau_sum - 1) / math_ops.cast(k_z, logits.dtype) # calculate p - return math_ops.maximum( + p = math_ops.maximum( math_ops.cast(0, logits.dtype), z - tau_z[:, array_ops.newaxis]) + # If k_z = 0 or if z = nan, then the input is invalid + p_safe = array_ops.where( + math_ops.logical_or( + math_ops.equal(k_z, 0), math_ops.is_nan(z_cumsum[:, -1])), + array_ops.fill([obs, dims], math_ops.cast(float("nan"), logits.dtype)), + p) + + return p_safe diff --git a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py index 582d1e6136df4d3ad3c8108ae9607d5fef519145..c0438f16bc857cbda248eb2791883ae39e1c5eb1 100644 --- a/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py +++ b/tensorflow/contrib/sparsemax/python/ops/sparsemax_loss.py @@ -47,14 +47,30 @@ def sparsemax_loss(logits, sparsemax, labels, name=None): sparsemax = ops.convert_to_tensor(sparsemax, name="sparsemax") labels = ops.convert_to_tensor(labels, name="labels") - shifted_logits = logits - \ - math_ops.reduce_mean(logits, axis=1)[:, array_ops.newaxis] + # In the paper, they call the logits z. + # A constant can be substracted from logits to make the algorithm + # more numerically stable in theory. However, there are really no major + # source numerical instability in this algorithm. + z = logits # sum over support - support = math_ops.cast(sparsemax > 0, sparsemax.dtype) - sum_s = support * sparsemax * (shifted_logits - 0.5 * sparsemax) + # Use a conditional where instead of a multiplication to support z = -inf. + # If z = -inf, and there is no support (sparsemax = 0), a multiplication + # would cause 0 * -inf = nan, which is not correct in this case. + sum_s = array_ops.where( + math_ops.logical_or(sparsemax > 0, math_ops.is_nan(sparsemax)), + sparsemax * (z - 0.5 * sparsemax), array_ops.zeros_like(sparsemax)) # - z_k + ||q||^2 - q_part = labels * (0.5 * labels - shifted_logits) + q_part = labels * (0.5 * labels - z) + # Fix the case where labels = 0 and z = -inf, where q_part would + # otherwise be 0 * -inf = nan. But since the lables = 0, no cost for + # z = -inf should be consideredself. + # The code below also coveres the case where z = inf. Howeverm in this + # caose the sparsemax will be nan, which means the sum_s will also be nan, + # therefor this case doesn't need addtional special treatment. + q_part_safe = array_ops.where( + math_ops.logical_and(math_ops.equal(labels, 0), math_ops.is_inf(z)), + array_ops.zeros_like(z), q_part) - return math_ops.reduce_sum(sum_s + q_part, axis=1) + return math_ops.reduce_sum(sum_s + q_part_safe, axis=1) diff --git a/tensorflow/contrib/stateless/BUILD b/tensorflow/contrib/stateless/BUILD index a217397c1a219508c74382bf0df4bf9aa0a15aff..e9ddec888994f37b5c5dc5afd327689aa9957f13 100644 --- a/tensorflow/contrib/stateless/BUILD +++ b/tensorflow/contrib/stateless/BUILD @@ -11,7 +11,10 @@ load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") py_library( name = "stateless", - srcs = ["__init__.py"], + srcs = [ + "__init__.py", + "python/stateless_ops.py", + ], srcs_version = "PY2AND3", deps = [ "//tensorflow/python:framework_ops", diff --git a/tensorflow/contrib/stateless/__init__.py b/tensorflow/contrib/stateless/__init__.py index fe23fe0dd86d9e8267a1dba945e6dd1daec3f4af..30d0a7ab6ae1ebca44df9cba788674fcad69d538 100644 --- a/tensorflow/contrib/stateless/__init__.py +++ b/tensorflow/contrib/stateless/__init__.py @@ -32,16 +32,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import ops - # pylint: disable=wildcard-import -from tensorflow.python.ops.gen_stateless_random_ops import * +from tensorflow.contrib.stateless.python.stateless_ops import * from tensorflow.python.util.all_util import remove_undocumented -ops.NotDifferentiable("StatelessMultinomial") -ops.NotDifferentiable("StatelessRandomNormal") -ops.NotDifferentiable("StatelessRandomUniform") -ops.NotDifferentiable("StatelessTruncatedNormal") - remove_undocumented(__name__) diff --git a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py index d724a5c014d2f9f5f6e3a6704341bcb8c429ae06..ec5a13b7c624918ea8024e8ced1954b02125b9a0 100644 --- a/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py +++ b/tensorflow/contrib/stateless/python/kernel_tests/stateless_random_ops_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import functools + import numpy as np from tensorflow.contrib import stateless from tensorflow.python.framework import constant_op @@ -27,10 +29,6 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import random_ops from tensorflow.python.platform import test -CASES = [(stateless.stateless_random_uniform, random_ops.random_uniform), - (stateless.stateless_random_normal, random_ops.random_normal), - (stateless.stateless_truncated_normal, random_ops.truncated_normal)] - def invert_philox(key, value): """Invert the Philox bijection.""" @@ -51,90 +49,30 @@ def invert_philox(key, value): class StatelessOpsTest(test.TestCase): - def testMatchStateful(self): + def _test_match(self, cases): # Stateless ops should be the same as stateful ops on the first call # after seed scrambling. + cases = tuple(cases) key = 0x3ec8f720, 0x02461e29 for seed in (7, 17), (11, 5), (2, 3): preseed = invert_philox(key, (seed[0], 0, seed[1], 0)).astype(np.uint64) preseed = preseed[::2] | preseed[1::2] << 32 random_seed.set_random_seed(seed[0]) with self.test_session(use_gpu=True): - for stateless_op, stateful_op in CASES: - for shape in (), (3,), (2, 5): - stateful = stateful_op(shape, seed=seed[1]) - pure = stateless_op(shape, seed=preseed) - self.assertAllEqual(stateful.eval(), pure.eval()) + for stateless_op, stateful_op in cases: + stateful = stateful_op(seed=seed[1]) + pure = stateless_op(seed=preseed) + self.assertAllEqual(stateful.eval(), pure.eval()) - def testDeterminism(self): + def _test_determinism(self, cases): # Stateless values should be equal iff the seeds are equal (roughly) + cases = tuple(cases) with self.test_session(use_gpu=True): for seed_type in [dtypes.int32, dtypes.int64]: seed_t = array_ops.placeholder(seed_type, shape=[2]) seeds = [(x, y) for x in range(5) for y in range(5)] * 3 - for stateless_op, _ in CASES: - for shape in (), (3,), (2, 5): - pure = stateless_op(shape, seed=seed_t) - values = [(seed, pure.eval(feed_dict={seed_t: seed})) - for seed in seeds] - for s0, v0 in values: - for s1, v1 in values: - self.assertEqual(s0 == s1, np.all(v0 == v1)) - - def testShapeType(self): - with self.test_session(use_gpu=True): - for shape_dtype in [dtypes.int32, dtypes.int64]: - seed_t = array_ops.placeholder(dtypes.int64, shape=[2]) - seeds = [(x, y) for x in range(5) for y in range(5)] * 3 - for stateless_op, _ in CASES: - for shape in (), (3,), (2, 5): - pure = stateless_op(constant_op.constant(shape, dtype=shape_dtype), - seed=seed_t) - values = [(seed, pure.eval(feed_dict={seed_t: seed})) - for seed in seeds] - for s0, v0 in values: - for s1, v1 in values: - self.assertEqual(s0 == s1, np.all(v0 == v1)) - - def testMatchStatefulMultinomial(self): - # Stateless ops should be the same as stateful ops on the first call - # after seed scrambling. - key = 0x3ec8f720, 0x02461e29 - num_samples = 4 - for logits_dtype in np.float16, np.float32, np.float64: - for output_dtype in dtypes.int32, dtypes.int64: - for seed in (7, 17), (11, 5), (2, 3): - preseed = invert_philox(key, - (seed[0], 0, seed[1], 0)).astype(np.uint64) - preseed = preseed[::2] | preseed[1::2] << 32 - random_seed.set_random_seed(seed[0]) - with self.test_session(use_gpu=True): - for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], - [0.25, 0.75]]): - logits_t = constant_op.constant(logits, dtype=logits_dtype) - stateful = random_ops.multinomial( - logits_t, - num_samples, - seed=seed[1], - output_dtype=output_dtype) - pure = stateless.stateless_multinomial( - logits_t, - num_samples, - seed=preseed, - output_dtype=output_dtype) - self.assertAllEqual(stateful.eval(), pure.eval()) - - def testDeterminismMultinomial(self): - # Stateless values should be equal iff the seeds are equal (roughly) - num_samples = 10 - with self.test_session(use_gpu=True): - for seed_type in [dtypes.int32, dtypes.int64]: - seed_t = array_ops.placeholder(seed_type, shape=[2]) - seeds = [(x, y) for x in range(5) for y in range(5)] * 3 - for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], - [0.25, 0.75]]): - pure = stateless.stateless_multinomial( - logits, num_samples, seed=seed_t) + for stateless_op, _ in cases: + pure = stateless_op(seed=seed_t) values = [ (seed, pure.eval(feed_dict={seed_t: seed})) for seed in seeds ] @@ -142,6 +80,74 @@ class StatelessOpsTest(test.TestCase): for s1, v1 in values: self.assertEqual(s0 == s1, np.all(v0 == v1)) + def _float_cases(self, shape_dtypes=(None,)): + float_cases = ( + # Uniform distribution, with and without range + (stateless.stateless_random_uniform, random_ops.random_uniform, {}), + (stateless.stateless_random_uniform, random_ops.random_uniform, + dict(minval=2.2, maxval=7.1)), + # Normal distribution, with and without mean+stddev + (stateless.stateless_random_normal, random_ops.random_normal, {}), + (stateless.stateless_random_normal, random_ops.random_normal, + dict(mean=2, stddev=3)), + # Truncated normal distribution, with and without mean+stddev + (stateless.stateless_truncated_normal, random_ops.truncated_normal, {}), + (stateless.stateless_truncated_normal, random_ops.truncated_normal, + dict(mean=3, stddev=4)), + ) + for dtype in dtypes.float16, dtypes.float32, dtypes.float64: + for shape_dtype in shape_dtypes: + for shape in (), (3,), (2, 5): + if shape_dtype is not None: + shape = constant_op.constant(shape, dtype=shape_dtype) + for stateless_op, stateful_op, kwds in float_cases: + kwds = dict(shape=shape, dtype=dtype, **kwds) + yield (functools.partial(stateless_op, **kwds), + functools.partial(stateful_op, **kwds)) + + def _int_cases(self, shape_dtypes=(None,)): + for shape_dtype in shape_dtypes: + for shape in (), (3,), (2, 5): + if shape_dtype is not None: + shape = constant_op.constant(shape, dtype=shape_dtype) + for dtype in dtypes.int32, dtypes.int64: + kwds = dict(minval=2, maxval=11111, dtype=dtype, shape=shape) + yield (functools.partial(stateless.stateless_random_uniform, **kwds), + functools.partial(random_ops.random_uniform, **kwds)) + + def _multinomial_cases(self): + num_samples = 10 + for logits_dtype in np.float16, np.float32, np.float64: + for output_dtype in dtypes.int32, dtypes.int64: + for logits in ([[0.1, 0.25, 0.5, 0.15]], [[0.5, 0.5], [0.8, 0.2], + [0.25, 0.75]]): + kwds = dict( + logits=constant_op.constant(logits, dtype=logits_dtype), + num_samples=num_samples, + output_dtype=output_dtype) + yield (functools.partial(stateless.stateless_multinomial, **kwds), + functools.partial(random_ops.multinomial, **kwds)) + + def testMatchFloat(self): + self._test_match(self._float_cases()) + + def testMatchInt(self): + self._test_match(self._int_cases()) + + def testMatchMultinomial(self): + self._test_match(self._multinomial_cases()) + + def testDeterminismFloat(self): + self._test_determinism( + self._float_cases(shape_dtypes=(dtypes.int32, dtypes.int64))) + + def testDeterminismInt(self): + self._test_determinism( + self._int_cases(shape_dtypes=(dtypes.int32, dtypes.int64))) + + def testDeterminismMultinomial(self): + self._test_determinism(self._multinomial_cases()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/stateless/python/stateless_ops.py b/tensorflow/contrib/stateless/python/stateless_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..1449825c83c38b648287c8d38279d5399b63e3ef --- /dev/null +++ b/tensorflow/contrib/stateless/python/stateless_ops.py @@ -0,0 +1,214 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Stateless random ops which take seed as a tensor input.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.ops import gen_stateless_random_ops + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import random_ops +from tensorflow.python.ops import math_ops + +ops.NotDifferentiable("StatelessMultinomial") +ops.NotDifferentiable("StatelessRandomNormal") +ops.NotDifferentiable("StatelessRandomUniform") +ops.NotDifferentiable("StatelessRandomUniformInt") +ops.NotDifferentiable("StatelessTruncatedNormal") + + +def stateless_random_uniform(shape, + seed, + minval=0, + maxval=None, + dtype=dtypes.float32, + name=None): + """Outputs deterministic pseudorandom values from a uniform distribution. + + This is a stateless version of `tf.random_uniform`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + The generated values follow a uniform distribution in the range + `[minval, maxval)`. The lower bound `minval` is included in the range, while + the upper bound `maxval` is excluded. + + For floats, the default range is `[0, 1)`. For ints, at least `maxval` must + be specified explicitly. + + In the integer case, the random integers are slightly biased unless + `maxval - minval` is an exact power of two. The bias is small for values of + `maxval - minval` significantly smaller than the range of the output (either + `2**32` or `2**64`). + + Args: + shape: A 1-D integer Tensor or Python array. The shape of the output tensor. + seed: A shape [2] integer Tensor of seeds to the random number generator. + minval: A 0-D Tensor or Python value of type `dtype`. The lower bound on the + range of random values to generate. Defaults to 0. + maxval: A 0-D Tensor or Python value of type `dtype`. The upper bound on the + range of random values to generate. Defaults to 1 if `dtype` is floating + point. + dtype: The type of the output: `float16`, `float32`, `float64`, `int32`, or + `int64`. + name: A name for the operation (optional). + + Returns: + A tensor of the specified shape filled with random uniform values. + + Raises: + ValueError: If `dtype` is integral and `maxval` is not specified. + """ + dtype = dtypes.as_dtype(dtype) + if dtype not in (dtypes.float16, dtypes.bfloat16, dtypes.float32, + dtypes.float64, dtypes.int32, dtypes.int64): + raise ValueError("Invalid dtype %r" % dtype) + if maxval is None: + if dtype.is_integer: + raise ValueError("Must specify maxval for integer dtype %r" % dtype) + maxval = 1 + with ops.name_scope(name, "stateless_random_uniform", + [shape, seed, minval, maxval]) as name: + shape = random_ops._ShapeTensor(shape) # pylint: disable=protected-access + minval = ops.convert_to_tensor(minval, dtype=dtype, name="min") + maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max") + if dtype.is_integer: + return gen_stateless_random_ops.stateless_random_uniform_int( + shape, seed=seed, minval=minval, maxval=maxval, name=name) + else: + rnd = gen_stateless_random_ops.stateless_random_uniform( + shape, seed=seed, dtype=dtype) + return math_ops.add(rnd * (maxval - minval), minval, name=name) + + +def stateless_random_normal(shape, + seed, + mean=0.0, + stddev=1.0, + dtype=dtypes.float32, + name=None): + """Outputs deterministic pseudorandom values from a normal distribution. + + This is a stateless version of `tf.random_normal`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + Args: + shape: A 1-D integer Tensor or Python array. The shape of the output tensor. + seed: A shape [2] integer Tensor of seeds to the random number generator. + mean: A 0-D Tensor or Python value of type `dtype`. The mean of the normal + distribution. + stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation + of the normal distribution. + dtype: The type of the output. + name: A name for the operation (optional). + + Returns: + A tensor of the specified shape filled with random normal values. + """ + with ops.name_scope(name, "stateless_random_normal", + [shape, seed, mean, stddev]) as name: + shape = random_ops._ShapeTensor(shape) # pylint: disable=protected-access + mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean") + stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") + rnd = gen_stateless_random_ops.stateless_random_normal(shape, seed, dtype) + return math_ops.add(rnd * stddev, mean, name=name) + + +def stateless_truncated_normal(shape, + seed, + mean=0.0, + stddev=1.0, + dtype=dtypes.float32, + name=None): + """Outputs deterministic pseudorandom values, truncated normally distributed. + + This is a stateless version of `tf.truncated_normal`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + The generated values follow a normal distribution with specified mean and + standard deviation, except that values whose magnitude is more than 2 standard + deviations from the mean are dropped and re-picked. + + Args: + shape: A 1-D integer Tensor or Python array. The shape of the output tensor. + seed: A shape [2] integer Tensor of seeds to the random number generator. + mean: A 0-D Tensor or Python value of type `dtype`. The mean of the + truncated normal distribution. + stddev: A 0-D Tensor or Python value of type `dtype`. The standard deviation + of the normal distribution, before truncation. + dtype: The type of the output. + name: A name for the operation (optional). + + Returns: + A tensor of the specified shape filled with random truncated normal values. + """ + with ops.name_scope(name, "stateless_truncated_normal", + [shape, seed, mean, stddev]) as name: + shape = random_ops._ShapeTensor(shape) # pylint: disable=protected-access + mean = ops.convert_to_tensor(mean, dtype=dtype, name="mean") + stddev = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") + rnd = gen_stateless_random_ops.stateless_truncated_normal( + shape, seed, dtype) + return math_ops.add(rnd * stddev, mean, name=name) + + +def stateless_multinomial(logits, + num_samples, + seed, + output_dtype=dtypes.int64, + name=None): + """Draws deterministic pseudorandom samples from a multinomial distribution. + + This is a stateless version of `tf.multinomial`: if run twice with the + same seeds, it will produce the same pseudorandom numbers. The output is + consistent across multiple runs on the same hardware (and between CPU + and GPU), but may change between versions of TensorFlow or on non-CPU/GPU + hardware. + + Example: + + ```python + # samples has shape [1, 5], where each value is either 0 or 1 with equal + # probability. + samples = tf.contrib.stateless.stateless_multinomial( + tf.log([[10., 10.]]), 5, seed=[7, 17]) + ``` + + Args: + logits: 2-D Tensor with shape `[batch_size, num_classes]`. Each slice + `[i, :]` represents the unnormalized log-probabilities for all classes. + num_samples: 0-D. Number of independent samples to draw for each row slice. + seed: A shape [2] integer Tensor of seeds to the random number generator. + name: Optional name for the operation. + output_dtype: integer type to use for the output. Defaults to int64. + + Returns: + The drawn samples of shape `[batch_size, num_samples]`. + """ + with ops.name_scope(name, "stateless_multinomial", [logits, seed]): + logits = ops.convert_to_tensor(logits, name="logits") + return gen_stateless_random_ops.stateless_multinomial( + logits, num_samples, seed, output_dtype=output_dtype) diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py index 03da2b82e5f8810d2a83a3d2bc03d8d285703861..9c585fe6a7537d105dba57818b5b33f559bfa6bc 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils.py @@ -543,20 +543,25 @@ class TupleOfTensorsLookup(lookup.LookupInterface): overhead. """ - def __init__( - self, key_dtype, default_values, empty_key, name, checkpoint=True): + def __init__(self, + key_dtype, + default_values, + empty_key, + deleted_key, + name, + checkpoint=True): default_values_flat = nest.flatten(default_values) - self._hash_tables = nest.pack_sequence_as( - default_values, - [TensorValuedMutableDenseHashTable( + self._hash_tables = nest.pack_sequence_as(default_values, [ + TensorValuedMutableDenseHashTable( key_dtype=key_dtype, value_dtype=default_value.dtype.base_dtype, default_value=default_value, empty_key=empty_key, + deleted_key=deleted_key, name=name + "_{}".format(table_number), checkpoint=checkpoint) - for table_number, default_value - in enumerate(default_values_flat)]) + for table_number, default_value in enumerate(default_values_flat) + ]) self._name = name def lookup(self, keys): diff --git a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py index c0de42b15b3367aea4c076f4058641ae99b9219e..91265b9b2e6dad09dcca19cbd9f2f25763f3dd43 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py +++ b/tensorflow/contrib/timeseries/python/timeseries/math_utils_test.py @@ -223,10 +223,12 @@ class TestLookupTable(test.TestCase): hash_table = math_utils.TupleOfTensorsLookup( key_dtype=dtypes.int64, default_values=[[ - array_ops.ones([3, 2], dtype=dtypes.float32), array_ops.zeros( - [5], dtype=dtypes.float64) - ], array_ops.ones([7, 7], dtype=dtypes.int64)], + array_ops.ones([3, 2], dtype=dtypes.float32), + array_ops.zeros([5], dtype=dtypes.float64) + ], + array_ops.ones([7, 7], dtype=dtypes.int64)], empty_key=-1, + deleted_key=-2, name="test_lookup") def stack_tensor(base_tensor): return array_ops.stack([base_tensor + 1, base_tensor + 2]) diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_management.py b/tensorflow/contrib/timeseries/python/timeseries/state_management.py index 13eecd4d822faaeb9553c7723c6842cdcb38aa3f..138406c6168f48339f6b96102d26e868f36083d3 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_management.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_management.py @@ -149,6 +149,7 @@ class ChainingStateManager(_OverridableStateManager): key_dtype=dtypes.int64, default_values=self._start_state, empty_key=-1, + deleted_key=-2, name="cached_states", checkpoint=self._checkpoint_state) diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 10ed1c289109610b25b998eee332a3ddbf940dc3..401afcb0f440629082da0ab967ae799373e55cd2 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -86,6 +86,7 @@ py_library( "//tensorflow/python:variable_scope", "//tensorflow/python:variables", "//tensorflow/python/estimator:estimator_py", + "//tensorflow/python/estimator:util", "@six_archive//:six", ], ) @@ -191,6 +192,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":keras_support", # split out to avoid cycle with tpu_strategy + ":tpu_embedding", ":tpu_estimator", ":tpu_lib", ], @@ -302,6 +304,7 @@ tf_py_test( "//tensorflow/python:client_testlib", ":datasets", ], + flaky = 1, # TODO(b/117363808): fails 1/1000 OSS runs grpc_enabled = True, ) @@ -394,3 +397,21 @@ tf_py_test( "//tensorflow/python:framework_test_lib", ], ) + +py_library( + name = "tpu_embedding", + srcs = ["python/tpu/tpu_embedding.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/tpu:tpu_ops", + "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:init_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:partitioned_variables", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + "@six_archive//:six", + ], +) diff --git a/tensorflow/contrib/tpu/proto/optimization_parameters.proto b/tensorflow/contrib/tpu/proto/optimization_parameters.proto index 8529b48c15fa749a7c2978003f47aafca17da82f..c2e3be03db0e4cca1a664f9e79aa9107384de312 100644 --- a/tensorflow/contrib/tpu/proto/optimization_parameters.proto +++ b/tensorflow/contrib/tpu/proto/optimization_parameters.proto @@ -62,9 +62,9 @@ message FtrlParameters { // (https://www.tensorflow.org/api_docs/python/tf/contrib/opt/AdamOptimizer). If // use_non_lazy_adam is enabled, use_gradient_accumulation is also required in // order to get correct results; a warning will be printed otherwise (which may -// change to an error in the future). If use_max_with_epsilon is set, the Adam +// change to an error in the future). If use_sum_inside_sqrt is set, the Adam // variable update formula will be changed from m / (sqrt(v) + epsilon) to -// m / max(sqrt(v), abs(epsilon)); this option improves the performance of TPU +// m / sqrt(v + epsilon**2); this option improves the performance of TPU // training and is not expected to harm model quality. message AdamParameters { float beta1 = 3; @@ -73,7 +73,7 @@ message AdamParameters { float initial_m = 6; float initial_v = 7; bool use_non_lazy_adam = 8; - bool use_max_with_epsilon = 9; + bool use_sum_inside_sqrt = 10; } // https://www.tensorflow.org/api_docs/python/tf/train/MomentumOptimizer diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py index af183b32325287eb9180b8ec9541bf712722174e..d628258b9dcfbee5a330d71e5838ee7bf7732fbd 100644 --- a/tensorflow/contrib/tpu/python/tpu/keras_support.py +++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py @@ -286,7 +286,8 @@ def _cross_replica_concat(tensor, core_id, num_cores, name): '{}.'.format(input_dtype, name)) batch_size = tensor.shape[0] - mask = math_ops.to_float(math_ops.equal(range(num_cores), core_id)) + mask = math_ops.to_float( + math_ops.equal(np.arange(num_cores, dtype=np.int32), core_id)) mask = array_ops.reshape(mask, [num_cores] + [1] * tensor.shape.ndims) result = mask * math_ops.to_float(tensor) local_tensor_with_holes = array_ops.reshape(result, diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py index 05264f5a46fcd404a3185306c00ed6a4cdf06f77..82482563739325a634eb36c502886309b5e0bb3c 100644 --- a/tensorflow/contrib/tpu/python/tpu/session_support.py +++ b/tensorflow/contrib/tpu/python/tpu/session_support.py @@ -35,6 +35,8 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import session_run_hook from tensorflow.python.training import training_util +_WATCHDOG = None + class CoordinatorShutdownException(Exception): """Raised when the coordinator needs to shutdown.""" @@ -256,6 +258,22 @@ class WatchdogManager(threading.Thread): time.sleep(self.ping_interval) +def start_worker_watchdog(session, + devices=None, + ping_interval=60, + shutdown_timeout=3600): + """Start global worker watchdog to shutdown workers on coordinator exit.""" + global _WATCHDOG + if _WATCHDOG is None: + # Ensure we can send a few pings before we timeout! + ping_interval = min(shutdown_timeout / 10., ping_interval) + logging.info('Enabling watchdog timer with %d second timeout', + shutdown_timeout) + _WATCHDOG = WatchdogManager(session, devices, ping_interval, + shutdown_timeout) + _WATCHDOG.configure_and_run() + + class GracefulShutdownHook(session_run_hook.SessionRunHook): """Session hook that watches for shutdown events. diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py new file mode 100644 index 0000000000000000000000000000000000000000..741d2b759fd149677c170b33b6a537fcacd80d64 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/tpu_embedding.py @@ -0,0 +1,1099 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU embedding APIs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections +import copy +import math +import re +import six + +from tensorflow.contrib.framework.python.framework import experimental +from tensorflow.contrib.tpu.ops import gen_tpu_ops +from tensorflow.contrib.tpu.proto import tpu_embedding_configuration_pb2 as elc +from tensorflow.contrib.tpu.python.ops import tpu_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables + +TRAINING = elc.TPUEmbeddingConfiguration.TRAINING +INFERENCE = elc.TPUEmbeddingConfiguration.INFERENCE + +# TODO(shizhiw): A better interface is to make `num_hosts` and +# `num_cores_per_host` optional parameters for `TPUEmbedding` +# constructor. Usually they can be automatically detected, but +# user can also specify them for debugging (b/112112496). +# Auto-detection can be done with `tpu_system_metadata.py`. +_MASTER_JOB = 'tpu_worker' +_HOST_PATTERN = '/job:tpu_worker/task:{}/device:CPU:0' +_NUM_CORES_PER_HOST = 8 + +_TEST_MASTER_JOB = None +_TEST_HOST = '/replica:0/task:0/device:CPU:0' +_TEST_NUM_CORES_PER_HOST = 2 + + +class TableConfig( + collections.namedtuple( + 'TableConfig', + ['vocabulary_size', 'dimension', 'initializer', 'combiner'])): + """Embedding table configuration.""" + + @experimental + def __new__(cls, + vocabulary_size, + dimension, + initializer=None, + combiner='mean'): + """Embedding table configuration. + + Args: + vocabulary_size: Number of vocabulary (/rows) in the table. + dimension: The embedding dimension. + initializer: A variable initializer function to be used in embedding + variable initialization. If not specified, defaults to + `tf.truncated_normal_initializer` with mean `0.0` and standard deviation + `1/sqrt(dimension)`. + combiner: A string specifying how to reduce if there are multiple entries + in a single row. Currently 'mean', 'sqrtn' and 'sum' are supported, with + 'mean' the default. 'sqrtn' often achieves good accuracy, in particular + with bag-of-words columns. For more information, see + `tf.nn.embedding_lookup_sparse`. + + Returns: + `TableConfig`. + + Raises: + ValueError: if `vocabulary_size` is not positive integer. + ValueError: if `dimension` is not positive integer. + ValueError: if `initializer` is specified and is not callable. + ValueError: if `combiner` is not supported. + """ + if not isinstance(vocabulary_size, int) or vocabulary_size < 1: + raise ValueError('Invalid vocabulary_size {}.'.format(vocabulary_size)) + + if not isinstance(dimension, int) or dimension < 1: + raise ValueError('Invalid dimension {}.'.format(dimension)) + + if (initializer is not None) and (not callable(initializer)): + raise ValueError('initializer must be callable if specified.') + if initializer is None: + initializer = init_ops.truncated_normal_initializer( + mean=0.0, stddev=1 / math.sqrt(dimension)) + + if combiner not in ('mean', 'sum', 'sqrtn'): + raise ValueError('Invalid combiner {}'.format(combiner)) + + return super(TableConfig, cls).__new__(cls, vocabulary_size, dimension, + initializer, combiner) + + +# TODO(shizhiw): Factor `use_gradient_accumulation` and +# `pipeline_execution_with_tensor_core` out of `_OptimizationParameters`. +class _OptimizationParameters(object): + """Parameters common to all optimizations.""" + + def __init__(self, learning_rate, use_gradient_accumulation, + pipeline_execution_with_tensor_core): + self.learning_rate = learning_rate + self.use_gradient_accumulation = use_gradient_accumulation + self.pipeline_execution_with_tensor_core = ( + pipeline_execution_with_tensor_core) + + +class AdagradParameters(_OptimizationParameters): + """Optimization parameters for Adagrad.""" + + def __init__(self, learning_rate, initial_accumulator, + use_gradient_accumulation=False, + pipeline_execution_with_tensor_core=True): + """Optimization parameters for Adagrad. + + Args: + learning_rate: used for updating embedding table. + initial_accumulator: initial accumulator for Adagrad. + use_gradient_accumulation: setting this to `True` makes embedding + gradients calculation more accurate but slower. Please see + `optimization_parameters.proto` for details. + for details. + pipeline_execution_with_tensor_core: setting this to `True` makes training + faster, but trained model will be different if step N and step N+1 + involve the same set of embedding ID. Please see + `tpu_embedding_configuration.proto` for details. + """ + super(AdagradParameters, self).__init__(learning_rate, + use_gradient_accumulation, + pipeline_execution_with_tensor_core) + self.initial_accumulator = initial_accumulator + + +class AdamParameters(_OptimizationParameters): + """Optimization parameters for Adam.""" + + def __init__(self, learning_rate, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + use_gradient_accumulation=False, + pipeline_execution_with_tensor_core=True): + """Optimization parameters for Adam. + + Args: + learning_rate: a floating point value. The learning rate. + beta1: A float value. + The exponential decay rate for the 1st moment estimates. + beta2: A float value. + The exponential decay rate for the 2nd moment estimates. + epsilon: A small constant for numerical stability. + use_gradient_accumulation: setting this to `True` makes embedding + gradients calculation more accurate but slower. Please see + `optimization_parameters.proto` for details. + for details. + pipeline_execution_with_tensor_core: setting this to `True` makes training + faster, but trained model will be different if step N and step N+1 + involve the same set of embedding ID. Please see + `tpu_embedding_configuration.proto` for details. + """ + super(AdamParameters, self).__init__(learning_rate, + use_gradient_accumulation, + pipeline_execution_with_tensor_core) + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + + +class StochasticGradientDescentParameters(_OptimizationParameters): + """Optimization parameters for stochastic gradient descent. + + Args: + learning_rate: a floating point value. The learning rate. + use_gradient_accumulation: setting this to `True` makes embedding + gradients calculation more accurate but slower. Please see + `optimization_parameters.proto` for details. + pipeline_execution_with_tensor_core: setting this to `True` makes training + faster, but trained model will be different if step N and step N+1 + involve the same set of embedding ID. Please see + `tpu_embedding_configuration.proto` for details. + """ + + def __init__(self, learning_rate, use_gradient_accumulation=False, + pipeline_execution_with_tensor_core=True): + super(StochasticGradientDescentParameters, self).__init__( + learning_rate, use_gradient_accumulation, + pipeline_execution_with_tensor_core) + + +class TPUEmbedding(object): + """API for using TPU for embedding. + + Example: + ``` + table_config_user = tpu_embedding.TableConfig( + vocabulary_size=4, dimension=2, + initializer=initializer, combiner='mean') + table_to_config_dict = {'video': table_config_video, + 'user': table_config_user} + feature_to_table_dict = {'watched': 'video', + 'favorited': 'video', + 'friends': 'user'} + batch_size = 4 + num_hosts = 1 + optimization_parameters = tpu_embedding.AdagradParameters(1., 1.) + mode = tpu_embedding.TRAINING + embedding = tpu_embedding.TPUEmbedding( + table_to_config_dict, feature_to_table_dict, + batch_size, num_hosts, mode, optimization_parameters) + + batch_size_per_core = embedding.batch_size_per_core + sparse_features_list = [] + for host in hosts: + with ops.device(host): + for _ in range(embedding.num_cores_per_host): + sparse_features = {} + sparse_features['watched'] = sparse_tensor.SparseTensor(...) + sparse_features['favorited'] = sparse_tensor.SparseTensor(...) + sparse_features['friends'] = sparse_tensor.SparseTensor(...) + sparse_features_list.append(sparse_features) + + enqueue_ops = embedding.generate_enqueue_ops(sparse_features_list) + + def computation(): + activations = embedding.get_activations() + loss = compute_loss(activations) + + base_optimizer = gradient_descent.GradientDescentOptimizer( + learning_rate=1) + cross_shard_optimizer = tpu_optimizer.CrossShardOptimizer( + base_optimizer) + + train_op = cross_shard_optimizer.minimize(loss) + # `train_op` and `send_gradients_op` must happen in order. + with ops.control_dependencies([train_op]): + send_gradients_op = embedding.generate_send_gradients_op() + with ops.control_dependencies([send_gradients_op]): + loss = array_ops.identity(loss) + + loss = tpu.shard(computation, + num_shards=embedding.num_cores) + + with self.test_session() as sess: + sess.run(tpu.initialize_system(embedding_config= + embedding.config_proto)) + sess.run(variables.global_variables_initializer()) + sess.run(embedding.init_ops) + sess.run(enqueue_ops) + loss_val = sess.run(loss) + ``` + """ + + # TODO(shizhiw): Instead of `feature_to_table_dict` which maps to table + # name, consider `feature_to_config_dict` which maps to `FeatureConfig`. + # `FeatureConfig` could have fields other than table name. For example, it + # could have a field to indicate that the feature should not be used to + # update embedding table (cr/204852758, cr/204940540). Also, this can support + # different combiners for different features within the same table. + + # TODO(shizhiw): will it be cleaner to make `table_to_config_dict` and + # `feature_to_table_dict` lists of `TableSpec` and `FeatureSpec` respectively? + + # TODO(shizhiw): Consider adding `input_fn` as an option to remove boilerplate + # for-loops around construction of inputs. + + # `optimization_parameter` applies to all tables. If the need arises, + # we can add `optimization_parameters` to `TableConfig` to override this + # global setting. + @experimental + def __init__(self, + table_to_config_dict, + feature_to_table_dict, + batch_size, + num_hosts, + mode, + optimization_parameters=None, + tpu_embedding_test=False): + """API for using TPU for embedding lookups. + + Args: + table_to_config_dict: A dictionary mapping from string of table name to + `TableConfig`. Table refers to an embedding table, e.g. `params` + argument to `tf.nn.embedding_lookup_sparse()`. + feature_to_table_dict: A dictionary mapping from string of feature name + to string of table name. Feature refers to ids to lookup in embedding + table, e.g. `sp_ids` argument to `tf.nn.embedding_lookup_sparse()`. + batch_size: An `int` representing the global batch size. + num_hosts: An `int` representing the number of TPU hosts. + mode: `TRAINING` or `INFERENCE`. + optimization_parameters: `AdagradParameters`, `AdamParameters`, + `Stochasticgradientdescentparameters`. Must be set in training and must + not be `None` in inference. + tpu_embedding_test: A `bool`. Only used for testing. + + Raises: + ValueError: if any input is invalid. + """ + _validate_table_to_config_dict(table_to_config_dict) + # Avoid nondeterminism from `Dict` iteration order by using `OrderedDict`. + self._table_to_config_dict = _create_ordered_dict(table_to_config_dict) + self._combiners = _create_combiners(self._table_to_config_dict) + + _validate_feature_to_table_dict(table_to_config_dict, feature_to_table_dict) + self._feature_to_table_dict = _create_ordered_dict(feature_to_table_dict) + self._table_to_features_dict = _create_table_to_features_dict( + self._feature_to_table_dict) + + self._batch_size = batch_size + + if tpu_embedding_test: + self._num_hosts = 1 + self._hosts = [_TEST_HOST] + self._num_cores_per_host = _TEST_NUM_CORES_PER_HOST + else: + self._num_hosts = num_hosts + self._hosts = [_HOST_PATTERN.format(i) for i in range(self._num_hosts)] + self._num_cores_per_host = _NUM_CORES_PER_HOST + self._num_cores = self._num_cores_per_host * self._num_hosts + + _validate_batch_size(self._batch_size, self._num_cores) + self._batch_size_per_core = self._batch_size // self._num_cores + + self._init_ops = [] + + # TODO(shizhiw): remove `mode`? + if mode == TRAINING: + _validate_optimization_parameters(optimization_parameters) + self._optimization_parameters = optimization_parameters + elif mode == INFERENCE: + if optimization_parameters is not None: + raise ValueError('`optimization_parameters` should be `None` ' + 'for inference mode.') + self._optimization_parameters = ( + StochasticGradientDescentParameters(1.)) + else: + raise ValueError('`mode` only supports {} and {}; got {}.' + .format(TRAINING, INFERENCE, mode)) + self._mode = mode + + # TODO(shizhiw): move `optimization_parameters` into `_optimizer_handler` + # and create special handler for inference that inherits from + # StochasticGradientDescentHandler with more user-friendly error message + # on get_slot(). + self._optimizer_handler = _get_optimization_handler( + self._optimization_parameters) + + dummy_table_variables_init_op = self._create_dummy_table_variables() + self._init_ops.append(dummy_table_variables_init_op) + + self._config_proto = self._create_config_proto() + + self._create_variables_and_ops() + self._init_ops.extend(self._load_parameters_ops) + + @property + def hosts(self): + """A list of device names for CPU hosts. + + Returns: + A list of device names for CPU hosts. + """ + return self._hosts + + # TODO(shizhiw): change to num_tensor_cores_per_host to be more explicit and + # to be consistent with `tpu_embedding_configuration.proto`. + @property + def num_cores_per_host(self): + """Number of TPU cores on a CPU host. + + Returns: + Number of TPU cores on a CPU host. + """ + return self._num_cores_per_host + + @property + def num_cores(self): + """Total number of TPU cores on all hosts. + + Returns: + Total number of TPU cores on all hosts. + """ + return self._num_cores + + @property + def batch_size_per_core(self): + """Batch size for each TPU core. + + The sparse tensors in `sparse_features_list` to `generate_enqueue_ops` + must have batch dimension equal to this. + + Returns: + Batch size for each TPU core. + """ + return self._batch_size_per_core + + @property + def config_proto(self): + """Create embedding config proto for `tpu.initialize_system()`. + + Returns: + an `TPUEmbeddingConfiguration` proto describing the desired + configuration of the hardware embedding lookup tables, which + is passed to `tpu.initialize_system()`. + """ + return self._config_proto + + @property + def init_ops(self): + """Initialization ops for TPU embedding. + + It must be called after all global variables have been initialized, + i.e. after `global_variables_initializer()`, as it loads embedding + tables into TPU. + + Returns: + A list of ops. + """ + return self._init_ops + + # TODO(shizhiw): get table variables the same way as getting slot variables. + @property + def table_to_table_variables_dict(self): + return copy.copy(self._table_to_table_variables_dict) + + def get_slot_names(self): + """Return a list of the names of slots created by `TPUEmbedding`.""" + return self._optimizer_handler.get_slot_names() + + def get_slot(self, table, name): + """Return a slot named `name` create for `table` by `TPUEmbedding`.""" + return self._optimizer_handler.get_slot(table, name) + + # TODO(shizhiw): expose load to user too? + @property + def retrieve_parameters_ops(self): + return self._retrieve_parameters_ops + + def _create_config_proto(self): + """Create `TPUEmbeddingConfiguration`.""" + config_proto = elc.TPUEmbeddingConfiguration() + for table in self._table_to_config_dict: + table_descriptor = config_proto.table_descriptor.add() + table_descriptor.name = table + + table_config = self._table_to_config_dict[table] + table_descriptor.vocabulary_size = table_config.vocabulary_size + table_descriptor.dimension = table_config.dimension + + features_for_table = self._table_to_features_dict[table] + table_descriptor.num_features = len(features_for_table) + + table_descriptor.optimization_parameters.learning_rate.constant = ( + self._optimization_parameters.learning_rate) + table_descriptor.optimization_parameters.use_gradient_accumulation = ( + self._optimization_parameters.use_gradient_accumulation) + self._optimizer_handler.set_optimization_parameters(table_descriptor) + + config_proto.mode = self._mode + config_proto.batch_size_per_tensor_core = self._batch_size_per_core + config_proto.num_hosts = self._num_hosts + config_proto.num_tensor_cores = self._num_cores + config_proto.sharding_strategy = elc.TPUEmbeddingConfiguration.DIV_DEFAULT + config_proto.pipeline_execution_with_tensor_core = ( + self._optimization_parameters.pipeline_execution_with_tensor_core) + + return config_proto + + def _create_variables_and_ops(self): + """Create embedding variables and return ops to load them into TPU.""" + self._load_parameters_ops = [] + self._retrieve_parameters_ops = [] + self._table_to_table_variables_dict = {} + for table in self._table_to_config_dict: + device_fn = _create_device_fn(self._hosts) + with ops.device(device_fn): + # TODO(shizhiw): allow user to specify variable name so that + # they could make the name consistent with CPU etc. + variable_name = table + table_variables = _create_partitioned_variables( + name=variable_name, + num_hosts=self._num_hosts, + vocabulary_size=self._table_to_config_dict[table].vocabulary_size, + embedding_dimension=self._table_to_config_dict[table].dimension, + initializer=self._table_to_config_dict[table].initializer, + collections=[ops.GraphKeys.GLOBAL_VARIABLES]) + self._table_to_table_variables_dict[table] = table_variables + + self._optimizer_handler.create_variables_and_ops( + table, variable_name, self._num_hosts, + self._table_to_config_dict[table], table_variables, + self._load_parameters_ops, self._retrieve_parameters_ops) + + def _create_dummy_table_variables(self): + """Create dummy embedding table variables. + + The sole purpose of these dummy variables are to trigger gradient + calcuation wrt them so that the gradients wrt activation can be captured + and later sent to TPU embedding. + + Returns: + Initializer for these variables. + + Raises: + RuntimeError: if collection to store gradients already exists and is not + empty. + """ + self._dummy_table_variables = [] + # TODO(shizhiw): remove table id. + for table_id, table in enumerate(self._table_to_features_dict): + self._dummy_table_variables.append( + variable_scope.get_variable( + 'tpu_embedding_dummy_table_variable_%s' % table, + dtype=dtypes.float32, + shape=[1], + use_resource=True, + trainable=True, + # TODO(shizhiw): Remove these dummy variables as + # tensorflow optimizer creates slot variable for them which + # is undesirable. + # e.g. tpu_embedding_dummy_table_variable_mlp_user/Adam{_1}. + # Explicitly specifying collections prevents this variable from + # being added to the GLOBAL_VARIABLES collection, so that Saver() + # ignores it. + collections=['tpu_embedding_dummy_table_variables'])) + + g = ops.get_default_graph() + table_gradients = g.get_collection_ref( + 'tpu_embedding_gradients_table_%d' % table_id) + if table_gradients: + raise RuntimeError( + 'tpu_embedding_gradients_table_%d is not empty.' % table_id) + table_gradients.extend([None] * len(self._table_to_features_dict[table])) + + return variables.variables_initializer( + self._dummy_table_variables, + name='tpu_embedding_dummy_table_variables_init') + + def generate_enqueue_ops(self, sparse_features_list): + """Generate enqueue ops. + + Args: + sparse_features_list: a list of dictionary mapping from string + of feature names to sparse tensor. Each dictionary is for one + TPU core. Dictionaries for the same core should be contiguous + on the list. + + Returns: + Ops to enqueue to TPU for embedding. + """ + self._validate_generate_enqueue_ops_sparse_features_list( + sparse_features_list) + return [ + self._generate_enqueue_op( + sparse_features, device_ordinal=i % self._num_cores_per_host) + for i, sparse_features in enumerate(sparse_features_list) + ] + + def _validate_generate_enqueue_ops_sparse_features_list( + self, sparse_features_list): + """Validate `sparse_features_list`.""" + if len(sparse_features_list) != self._num_cores: + raise ValueError('Length of `sparse_features_list` should match the ' + 'number of cores; ' + '`len(sparse_features_list)` is {}, ' + 'number of cores is {}.'.format( + len(sparse_features_list), self._num_cores)) + + feature_set = set(self._feature_to_table_dict.keys()) + contiguous_device = None + for i, sparse_features in enumerate(sparse_features_list): + used_feature_set = set(sparse_features.keys()) + + # Check features are valid. + missing_feature_set = feature_set - used_feature_set + if missing_feature_set: + raise ValueError('`sparse_features_list[{}]` misses a feature that is ' + 'in `feature_to_config_dict`: {}.'.format( + i, missing_feature_set)) + + extra_feature_set = used_feature_set - feature_set + if extra_feature_set: + raise ValueError('`sparse_features_list[{}]` has a feature that is not ' + 'in `feature_to_config_dict`: {}.'.format( + i, extra_feature_set)) + + device = None + device_feature = None + for feature, tensor in six.iteritems(sparse_features): + if not isinstance(tensor, sparse_tensor.SparseTensor): + raise ValueError('`sparse_features_list[{}]` has a feature that is ' + 'not mapped to `SparseTensor`. ' + '`feature`: {}, type: {}'.format( + i, feature, type(tensor))) + + # Check all features are on the same device. + if device is None: + device = tensor.op.device + device_feature = feature + else: + if device != tensor.op.device: + raise ValueError('Devices are different between features in ' + '`sparse_features_list[{}]`; ' + 'devices: {}, {}; features: {}, {}.'.format( + i, device, tensor.op.device, feature, + device_feature)) + + if i % self._num_cores_per_host: + if device != contiguous_device: + raise ValueError('We expect the `sparse_features` which are on the ' + 'same host to be contiguous in ' + '`sparse_features_list`, ' + '`sparse_features_list[{}]` is on device {}, ' + 'but is expected to be on device {}.'.format( + i, device, contiguous_device)) + else: + contiguous_device = device + + def _generate_enqueue_op(self, sparse_features, device_ordinal): + with ops.colocate_with(sparse_features.values()[0]): + sample_idcs, embedding_idcs, aggregation_weights = ( + self._format_for_tpu_embedding_sparse_batch(sparse_features)) + return tpu_ops.enqueue_tpu_embedding_sparse_batch( + sample_idcs, + embedding_idcs, + aggregation_weights, + combiners=self._combiners, + device_ordinal=device_ordinal) + + def _format_for_tpu_embedding_sparse_batch(self, sparse_features): + """Format sparse features for `enqueue_tpu_embedding_sparse_batch()`. + + Args: + sparse_features: a `Dict` of `SparseTensor`s for embedding. + + Returns: + Arguments for `enqueue_tpu_embedding_sparse_batch()`. + """ + + sample_idcs, embedding_idcs, aggregation_weights = list(), list(), list() + for table in self._table_to_features_dict: + sample_t, indices_t, weights_t = list(), list(), list() + + features = self._table_to_features_dict[table] + for i, feature in enumerate(features): + tensor = sparse_features[feature] + sample_indices = tensor.indices[:, 0] + embedding_indices = tensor.values + weights = array_ops.ones_like(embedding_indices) + sample_t.append(i * self._batch_size_per_core + sample_indices) + indices_t.append(embedding_indices) + weights_t.append(weights) + + sample_idcs.append( + math_ops.cast(array_ops.concat(sample_t, axis=0), dtype=dtypes.int32)) + embedding_idcs.append( + math_ops.cast( + array_ops.concat(indices_t, axis=0), dtype=dtypes.int32)) + aggregation_weights.append( + math_ops.cast( + array_ops.concat(weights_t, axis=0), dtype=dtypes.float32)) + + return sample_idcs, embedding_idcs, aggregation_weights + + def get_activations(self): + """Get activations for features. + + This should be called within `computation` that is passed to + `tpu.replicate` and friends. + + Returns: + A dictionary mapping from `String` of feature name to `Tensor` + of activation. + """ + recv_activations = tpu_ops.recv_tpu_embedding_activations( + num_outputs=len(self._table_to_config_dict), + config=self._config_proto.SerializeToString()) + + activations = collections.OrderedDict() + for table_id, table in enumerate(self._table_to_features_dict): + features = self._table_to_features_dict[table] + for lookup_id, feature in enumerate(features): + start_row = lookup_id * self._batch_size_per_core + end_row = start_row + self._batch_size_per_core + activations[feature] = gen_tpu_ops.tpu_embedding_activations( + self._dummy_table_variables[table_id], + recv_activations[table_id][start_row:end_row, :], + table_id=table_id, + lookup_id=lookup_id) + return activations + + # TODO(shizhiw): Make `gradient_multiplier` per feature. Setting it to 0 would + # have the effect of `tf.stop_gradients()`. + # TODO(shizhiw): Consider alternative ways to capture gradients wrt embedding + # layer outputs to remove `_dummy_table_variables`, + # `_embedding_activation_grad` and `tpu_embedding_gradients_table_%d'. + def generate_send_gradients_op(self, gradient_multipliers=None): + """Retrieve gradients from collections and send them to TPU embedding. + + Args: + gradient_multipliers: None, or dict mapping table names to gradient + multiplier Tensors. + + Returns: + SendTPUEmbeddingGradients Op. + + Raises: + ValueError: If required gradients have not been defined. + RuntimeError: If `mode` is not `TRAINING`. + """ + if self._mode != TRAINING: + raise RuntimeError('Only in training mode gradients need to ' + 'be sent to TPU embedding; got mode {}.' + .format(self._mode)) + + g = ops.get_default_graph() + gradients = list() + for table_id, table in enumerate(self._table_to_config_dict): + table_gradients = g.get_collection( + 'tpu_embedding_gradients_table_%d' % table_id) + if any(gradient is None for gradient in table_gradients): + raise ValueError( + 'Table {}/{} has undefined gradients: this is probably because the ' + 'model asked TPUEmbedding to compute activations that were not ' + 'used.'.format(table_id, table)) + concat_table_grads = array_ops.concat(table_gradients, axis=0) + if gradient_multipliers is not None: + concat_table_grads *= gradient_multipliers[table.name] + gradients.append(concat_table_grads) + + return tpu_ops.send_tpu_embedding_gradients( + inputs=gradients, config=self.config_proto.SerializeToString()) + + +def _validate_table_to_config_dict(table_to_config_dict): + """Validate `table_to_config_dict`.""" + for k, v in six.iteritems(table_to_config_dict): + if not isinstance(v, TableConfig): + raise ValueError('Value of `table_to_config_dict` must be of type ' + '`TableConfig`, got {} for {}.'.format(type(v), k)) + + +def _validate_feature_to_table_dict(table_to_config_dict, + feature_to_table_dict): + """Validate `feature_to_table_dict`.""" + used_table_set = set(feature_to_table_dict.values()) + table_set = set(table_to_config_dict.keys()) + + unused_table_set = table_set - used_table_set + if unused_table_set: + raise ValueError('`table_to_config_dict` specifies table that is not ' + 'used in `feature_to_table_dict`: {}.' + .format(unused_table_set)) + + extra_table_set = used_table_set - table_set + if extra_table_set: + raise ValueError('`feature_to_table_dict` refers to a table that is not ' + 'specified in `table_to_config_dict`: {}.' + .format(extra_table_set)) + + +def _validate_batch_size(batch_size, num_cores): + if batch_size % num_cores: + raise ValueError('`batch_size` is not a multiple of number of ' + 'cores. `batch_size`={}, `_num_cores`={}.'.format( + batch_size, num_cores)) + + +def _validate_optimization_parameters(optimization_parameters): + if not isinstance(optimization_parameters, _OptimizationParameters): + raise ValueError('`optimization_parameters` must inherit from ' + '`_OptimizationPramaters`. ' + '`type(optimization_parameters)`={}'.format( + type(optimization_parameters))) + + +class _OptimizerHandler(object): + """Interface class for handling optimizer specific logic.""" + + def __init__(self, optimization_parameters): + self._optimization_parameters = optimization_parameters + + def set_optimization_parameters(self, table_descriptor): + raise NotImplementedError() + + def create_variables_and_ops(self, table, variable_name): + raise NotImplementedError() + + def get_slot_names(self): + raise NotImplementedError() + + def get_slot(self, table, name): + raise NotImplementedError() + + +class _AdagradHandler(_OptimizerHandler): + """Handles Adagrad specific logic.""" + + def __init__(self, optimization_parameters): + super(_AdagradHandler, self).__init__(optimization_parameters) + self._table_to_accumulator_variables_dict = {} + + def set_optimization_parameters(self, table_descriptor): + table_descriptor.optimization_parameters.adagrad.SetInParent() + + def create_variables_and_ops(self, table, variable_name, num_hosts, + table_config, table_variables, + load_parameters_ops, retrieve_parameters_ops): + optimizer_name = 'Adagrad' + accumulator_initializer = init_ops.constant_initializer( + self._optimization_parameters.initial_accumulator) + accumulator_variables = _create_partitioned_variables( + name='%s/%s' % (variable_name, optimizer_name), + num_hosts=num_hosts, + vocabulary_size=table_config.vocabulary_size, + embedding_dimension=table_config.dimension, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + initializer=accumulator_initializer) + + self._table_to_accumulator_variables_dict[table] = accumulator_variables + for host_id, table_variable, accumulator_variable in (zip( + range(num_hosts), table_variables, accumulator_variables)): + with ops.colocate_with(table_variable): + load_parameters_op = ( + tpu_ops.load_tpu_embedding_adagrad_parameters( + parameters=table_variable, + accumulators=accumulator_variable, + table_name=table, + num_shards=num_hosts, + shard_id=host_id)) + retrieved_table, retrieved_accumulator = ( + tpu_ops.retrieve_tpu_embedding_adagrad_parameters( + table_name=table, + num_shards=num_hosts, + shard_id=host_id)) + retrieve_parameters_op = control_flow_ops.group( + state_ops.assign(table_variable, retrieved_table), + state_ops.assign(accumulator_variable, retrieved_accumulator)) + + load_parameters_ops.append(load_parameters_op) + retrieve_parameters_ops.append(retrieve_parameters_op) + + def get_slot_names(self): + return ['accumulator'] + + def get_slot(self, table, name): + if name not in self.get_slot_names(): + raise ValueError('Adagrad has {} as slot names; got {}.' + .format(self.get_slot_names(), name)) + return self._table_to_accumulator_variables_dict[table] + + +class _AdamHandler(_OptimizerHandler): + """Handles Adam specific logic.""" + + def __init__(self, optimization_parameters): + super(_AdamHandler, self).__init__(optimization_parameters) + self._table_to_m_variables_dict = {} + self._table_to_v_variables_dict = {} + + def set_optimization_parameters(self, table_descriptor): + table_descriptor.optimization_parameters.adam.beta1 = ( + self._optimization_parameters.beta1) + table_descriptor.optimization_parameters.adam.beta2 = ( + self._optimization_parameters.beta2) + table_descriptor.optimization_parameters.adam.epsilon = ( + self._optimization_parameters.epsilon) + + def create_variables_and_ops(self, table, variable_name, num_hosts, + table_config, table_variables, + load_parameters_ops, retrieve_parameters_ops): + optimizer_name = 'Adam' + m_initializer = init_ops.zeros_initializer() + m_variables = _create_partitioned_variables( + name='%s/%s/m' % (variable_name, optimizer_name), + num_hosts=num_hosts, + vocabulary_size=table_config.vocabulary_size, + embedding_dimension=table_config.dimension, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + initializer=m_initializer) + v_initializer = init_ops.zeros_initializer() + v_variables = _create_partitioned_variables( + name='%s/%s/v' % (variable_name, optimizer_name), + num_hosts=num_hosts, + vocabulary_size=table_config.vocabulary_size, + embedding_dimension=table_config.dimension, + collections=[ops.GraphKeys.GLOBAL_VARIABLES], + initializer=v_initializer) + + self._table_to_m_variables_dict[table] = m_variables + self._table_to_v_variables_dict[table] = v_variables + + for host_id, table_variable, m_variable, v_variable in (zip( + range(num_hosts), table_variables, + m_variables, v_variables)): + with ops.colocate_with(table_variable): + load_parameters_op = ( + tpu_ops.load_tpu_embedding_adam_parameters( + parameters=table_variable, + momenta=m_variable, + velocities=v_variable, + table_name=table, + num_shards=num_hosts, + shard_id=host_id)) + retrieved_table, retrieved_m, retrieved_v = ( + tpu_ops.retrieve_tpu_embedding_adam_parameters( + table_name=table, + num_shards=num_hosts, + shard_id=host_id)) + retrieve_parameters_op = control_flow_ops.group( + state_ops.assign(table_variable, retrieved_table), + state_ops.assign(m_variable, retrieved_m), + state_ops.assign(v_variable, retrieved_v)) + + load_parameters_ops.append(load_parameters_op) + retrieve_parameters_ops.append(retrieve_parameters_op) + + def get_slot_names(self): + return ['m', 'v'] + + def get_slot(self, table, name): + if name == 'm': + return self._table_to_m_variables_dict[table] + elif name == 'v': + return self._table_to_v_variables_dict[table] + else: + raise ValueError('Adam has {} as slot names; got {}.' + .format(self.get_slot_names(), name)) + + +class _StochasticGradientDescentHandler(_OptimizerHandler): + """Handles stochastic gradient descent specific logic.""" + + def set_optimization_parameters(self, table_descriptor): + (table_descriptor.optimization_parameters.stochastic_gradient_descent + .SetInParent()) + + def create_variables_and_ops(self, table, variable_name, num_hosts, + table_config, table_variables, + load_parameters_ops, retrieve_parameters_ops): + del table_config + + for host_id, table_variable in (zip( + range(num_hosts), table_variables)): + with ops.colocate_with(table_variable): + load_parameters_op = ( + tpu_ops + .load_tpu_embedding_stochastic_gradient_descent_parameters( + parameters=table_variable, + table_name=table, + num_shards=num_hosts, + shard_id=host_id)) + retrieved_table = ( + tpu_ops + .retrieve_tpu_embedding_stochastic_gradient_descent_parameters( + table_name=table, + num_shards=num_hosts, + shard_id=host_id)) + retrieve_parameters_op = control_flow_ops.group( + state_ops.assign(table_variable, retrieved_table)) + + load_parameters_ops.append(load_parameters_op) + retrieve_parameters_ops.append(retrieve_parameters_op) + + def get_slot_names(self): + return [] + + def get_slot(self, table, name): + raise ValueError('Stochastic gradient descent does not have slot variable.') + + +def _get_optimization_handler(optimization_parameters): + if isinstance(optimization_parameters, AdagradParameters): + return _AdagradHandler(optimization_parameters) + elif isinstance(optimization_parameters, AdamParameters): + return _AdamHandler(optimization_parameters) + elif isinstance(optimization_parameters, StochasticGradientDescentParameters): + return _StochasticGradientDescentHandler(optimization_parameters) + else: + return NotImplementedError() + + +def _create_ordered_dict(d): + """Create an OrderedDict from Dict.""" + return collections.OrderedDict((k, d[k]) for k in sorted(d)) + + +def _create_combiners(table_to_config_dict): + return [table_to_config_dict[t].combiner for t in table_to_config_dict] + + +def _create_table_to_features_dict(feature_to_table_dict): + """Create mapping from table to a list of its features.""" + table_to_features_dict_tmp = {} + for feature, table in six.iteritems(feature_to_table_dict): + if table in table_to_features_dict_tmp: + table_to_features_dict_tmp[table].append(feature) + else: + table_to_features_dict_tmp[table] = [feature] + + table_to_features_dict = collections.OrderedDict() + for table in sorted(table_to_features_dict_tmp): + table_to_features_dict[table] = sorted(table_to_features_dict_tmp[table]) + return table_to_features_dict + + +def _create_device_fn(hosts): + """Create device_fn() to use with _create_partitioned_variables().""" + + def device_fn(op): + """Returns the `device` for `op`.""" + part_match = re.match(r'.*/part_(\d+)(/|$)', op.name) + + if part_match: + idx = int(part_match.group(1)) + else: + raise RuntimeError('Internal Error: ' + 'Expected %s to contain /part_*.' % op.name) + + device = hosts[idx] + return device + + return device_fn + + +def _create_partitioned_variables(name, + num_hosts, + vocabulary_size, + embedding_dimension, + initializer, + collections=None): # pylint: disable=redefined-outer-name + """Creates ParitionedVariables based on `num_hosts` for `table`.""" + # TODO(shizhiw): automatically place embedding lookup elsewhere? + if vocabulary_size < num_hosts: + raise ValueError('`vocabulary_size`({}) is smaller than `num_hosts`({}). ' + 'As TPU embedding is not optimized for small tables, ' + 'please consider other ways for this embedding lookup.') + + slicing = [num_hosts, 1] + + # TODO(shizhiw): deprecated, use tf.get_variable()? + return partitioned_variables.create_partitioned_variables( + name=name, + slicing=slicing, + shape=(vocabulary_size, embedding_dimension), + dtype=dtypes.float32, + initializer=initializer, + collections=collections, + trainable=False) + + +@ops.RegisterGradient('TPUEmbeddingActivations') +def _embedding_activations_grad(activations_op, grad_wrt_activations): + """Saves the gradient of embedding activations ops in a graph collection.""" + g = ops.get_default_graph() + table_id = activations_op.get_attr('table_id') + lookup_id = activations_op.get_attr('lookup_id') + table_gradients = g.get_collection_ref( + 'tpu_embedding_gradients_table_%d' % table_id) + + if not table_gradients: + raise RuntimeError( + 'Gradients for TPUEmbedding have been generated in non-training mode. ' + 'This is not expected. Consider putting your Optimizer.minimize code ' + 'behind the training mode condition check. For Estimator, you can ' + 'do \n\n' + ' if mode == tf.estimator.ModeKeys.TRAIN:\n' + ' train_op = opt.minimize(loss)\n' + '\n') + + table_gradients[lookup_id] = array_ops.identity(grad_wrt_activations) + return [ + # RegisterGradient requires that value be returned for all inputs. Since + # the first argument (tpu_gradient_variable_{table_name}) has shape [1], + # we will return zeros(shape=[1]). The actual gradient w.r.t. the + # embedding activations (grad_wrt_activations) has the same shape as the + # activations returned by embedding_activations. + array_ops.zeros(arg.shape, dtype=dtypes.float32) + for arg in activations_op.inputs + ] diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 8d15c857f8a05e99b0f1106cba2a2d36824e5cac..a00acdcbcee38df2be4bc3c50f47137f247ff410 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -471,8 +471,10 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): def after_create_session(self, session, coord): logging.info('Init TPU system') + start = time.time() session.run(self._init_ops, options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000)) + logging.info('Initialized TPU in %d seconds', time.time() - start) self._infeed_controller = self._create_infeed_controller( name='InfeedController', target=self._run_infeed, args=(session,)) @@ -480,6 +482,12 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): self._outfeed_controller = _OpQueueContext( name='OutfeedController', target=self._run_outfeed, args=(session,)) + # Enable the worker watchdog to terminate workers on coordinator exit. + watchdog_timeout = int(os.environ.get('TF_TPU_WATCHDOG_TIMEOUT', '0')) + if watchdog_timeout > 0: + session_support.start_worker_watchdog(session, + shutdown_timeout=watchdog_timeout) + def before_run(self, run_context): self._feed_error = None @@ -694,7 +702,7 @@ def generate_per_host_enqueue_ops_fn_for_host( """Generates infeed enqueue ops for per-host input_fn on a single host.""" captured_infeed_queue = _CapturedObject() - hooks = [] + dataset_initializer = None with ops.device(device): user_context = tpu_context.TPUContext( @@ -716,7 +724,7 @@ def generate_per_host_enqueue_ops_fn_for_host( add_padding=True) if is_dataset: - hooks.append(inputs.dataset_initializer_hook()) + dataset_initializer = inputs.dataset_initializer() tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id) @@ -762,14 +770,14 @@ def generate_per_host_enqueue_ops_fn_for_host( 'signals': signals, } - return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset + return enqueue_ops_fn, captured_infeed_queue, dataset_initializer def generate_per_host_v2_enqueue_ops_fn_for_host( ctx, input_fn, inputs_structure_recorder, device, host_id): """Generates infeed enqueue ops for per-host input_fn on a single host.""" captured_infeed_queue = _CapturedObject() - hooks = [] + dataset_initializer = None with ops.device(device): user_context = tpu_context.TPUContext( @@ -790,7 +798,7 @@ def generate_per_host_v2_enqueue_ops_fn_for_host( add_padding=True, num_invocations_per_step=ctx.num_of_replicas_per_host) - hooks.append(inputs.dataset_initializer_hook()) + dataset_initializer = inputs.dataset_initializer() tpu_ordinal_function_impl = ctx.tpu_ordinal_function(host_id) def enqueue_ops_fn(): @@ -851,14 +859,14 @@ def generate_per_host_v2_enqueue_ops_fn_for_host( 'signals': signals, } - return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset + return enqueue_ops_fn, captured_infeed_queue, dataset_initializer def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder, num_hosts): """Generates infeed enqueue ops for one input_fn on all the hosts.""" captured_infeed_queue = _CapturedObject() - hooks = [] + dataset_initializer = None device_0 = ctx.tpu_host_placement_function(host_id=0) with ops.device(device_0): user_context = tpu_context.TPUContext( @@ -878,7 +886,7 @@ def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder, add_padding=True) if is_dataset: - hooks.append(inputs.dataset_initializer_hook()) + dataset_initializer = inputs.dataset_initializer() num_replicas_per_host = ctx.num_of_replicas_per_host def tpu_ordinal_function_impl(replica_id): @@ -929,7 +937,7 @@ def generate_broadcast_enqueue_ops_fn(ctx, input_fn, inputs_structure_recorder, 'signals': signals, } - return enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset + return enqueue_ops_fn, captured_infeed_queue, dataset_initializer class _InputPipeline(object): @@ -1133,7 +1141,7 @@ class _InputPipeline(object): """Deploys the input pipeline and record input structure.""" enqueue_ops = [] infeed_queues = [] - all_hooks = [] + all_dataset_initializers = [] num_hosts = self._ctx.num_hosts tpu_host_placement_fn = self._ctx.tpu_host_placement_function @@ -1165,12 +1173,12 @@ class _InputPipeline(object): elif self._ctx.is_input_broadcast_with_iterators(): # Only calls input_fn in host 0. host_device = tpu_host_placement_fn(host_id=0) - enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = ( + enqueue_ops_fn, captured_infeed_queue, dataset_initializer = ( generate_broadcast_enqueue_ops_fn(self._ctx, self._input_fn, self._inputs_structure_recorder, num_hosts)) - all_hooks.extend(hooks) - if is_dataset: + if dataset_initializer: + all_dataset_initializers.append(dataset_initializer) run_infeed_loop_on_coordinator = False wrap_fn = ( _wrap_computation_in_while_loop @@ -1186,17 +1194,16 @@ class _InputPipeline(object): with ops.device(host_device): with ops.name_scope('input_pipeline_task%d' % (host_id)): if self._ctx.is_input_per_host_with_iterators(): - enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = ( + enqueue_ops_fn, captured_infeed_queue, dataset_initializer = ( generate_per_host_v2_enqueue_ops_fn_for_host( self._ctx, self._input_fn, self._inputs_structure_recorder, host_device, host_id)) else: - enqueue_ops_fn, captured_infeed_queue, hooks, is_dataset = ( + enqueue_ops_fn, captured_infeed_queue, dataset_initializer = ( generate_per_host_enqueue_ops_fn_for_host( self._ctx, self._input_fn, self._inputs_structure_recorder, self._batch_axis, host_device, host_id)) - all_hooks.extend(hooks) # NOTE(xiejw): We dispatch here based on the return type of the # users `input_fn`. @@ -1210,7 +1217,8 @@ class _InputPipeline(object): # handled in TF control flow properly. In this case, we will use # python loop to enqueue the data into TPU system. This may be # slow compared to the previous case. - if is_dataset: + if dataset_initializer: + all_dataset_initializers.append(dataset_initializer) run_infeed_loop_on_coordinator = False wrap_fn = ( _wrap_computation_in_while_loop @@ -1225,7 +1233,9 @@ class _InputPipeline(object): # dequeue is dtypes and types. So, any one can be used. Here, grab the # first one. self._infeed_queue = infeed_queues[0] - return enqueue_ops, all_hooks, run_infeed_loop_on_coordinator + return enqueue_ops, [ + estimator_util.MultiHostDatasetInitializerHook(all_dataset_initializers) + ], run_infeed_loop_on_coordinator def _validate_input_pipeline(self): """Validates the input pipeline. @@ -1794,19 +1804,18 @@ class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook): summary_writer=summary_writer) def _log_and_record(self, elapsed_steps, elapsed_time, global_step): - global_steps_per_sec = elapsed_steps / elapsed_time - examples_per_sec = self._batch_size * global_steps_per_sec + global_step_per_sec = elapsed_steps / elapsed_time + examples_per_sec = self._batch_size * global_step_per_sec if self._summary_writer is not None: global_step_summary = Summary(value=[ - Summary.Value(tag='global_steps/sec', - simple_value=global_steps_per_sec) + Summary.Value(tag='global_step/sec', simple_value=global_step_per_sec) ]) example_summary = Summary(value=[ Summary.Value(tag='examples/sec', simple_value=examples_per_sec) ]) self._summary_writer.add_summary(global_step_summary, global_step) self._summary_writer.add_summary(example_summary, global_step) - logging.info('global_steps/sec: %g', global_steps_per_sec) + logging.info('global_step/sec: %g', global_step_per_sec) logging.info('examples/sec: %g', examples_per_sec) @@ -3047,23 +3056,19 @@ class _Inputs(object): """Returns True if the return value from input_fn is Dataset.""" return self._dataset is not None - def dataset_initializer_hook(self): - """Returns a `SessionRunHook` to initialize this dataset. + def dataset_initializer(self): + """Returns the dataset's initializer. - This must be called before `features_and_labels`. + The initializer must be run before calling `features_and_labels`. """ - iterator = self._dataset.make_initializable_iterator() - # pylint: disable=protected-access - hook = estimator_util._DatasetInitializerHook(iterator) - # pylint: enable=protected-access - self._iterator = iterator - return hook + self._iterator = self._dataset.make_initializable_iterator() + return self._iterator.initializer def features_and_labels(self): """Gets `features` and `labels`.""" if self.is_dataset: if self._iterator is None: - raise RuntimeError('Internal error: Must call dataset_initializer_hook ' + raise RuntimeError('Internal error: Must run dataset_initializer ' 'before calling features_and_labels(). Please file ' 'a bug!') return _Inputs._parse_inputs(self._iterator.get_next()) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py index bd530fdc3aaf585680ac94e1535051ae4156a925..3786e52b949dfac8c1587d1ea3041b625f00183f 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator_signals_test.py @@ -100,7 +100,7 @@ class TPUEstimatorStoppingSignalsTest(test.TestCase): with ops.Graph().as_default(): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size) - hook = inputs.dataset_initializer_hook() + dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() @@ -108,8 +108,7 @@ class TPUEstimatorStoppingSignalsTest(test.TestCase): self.assertIsNone(features['a'].shape.as_list()[0]) with session.Session() as sess: - hook.begin() - hook.after_create_session(sess, coord=None) + sess.run(dataset_initializer) result, evaluated_signals = sess.run([features, signals]) self.assertAllEqual(a[:batch_size], result['a']) @@ -143,7 +142,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size, add_padding=True) - hook = inputs.dataset_initializer_hook() + dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() @@ -151,8 +150,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): self.assertEqual(batch_size, features['a'].shape.as_list()[0]) with session.Session() as sess: - hook.begin() - hook.after_create_session(sess, coord=None) + sess.run(dataset_initializer) result, evaluated_signals = sess.run([features, signals]) self.assertAllEqual(a[:batch_size], result['a']) @@ -187,7 +185,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size, add_padding=True) - hook = inputs.dataset_initializer_hook() + dataset_initializer = inputs.dataset_initializer() features, labels = inputs.features_and_labels() signals = inputs.signals() @@ -195,8 +193,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): self.assertEqual(batch_size, features['a'].shape.as_list()[0]) with session.Session() as sess: - hook.begin() - hook.after_create_session(sess, coord=None) + sess.run(dataset_initializer) evaluated_features, evaluated_labels, evaluated_signals = ( sess.run([features, labels, signals])) @@ -255,7 +252,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals(dataset, batch_size, add_padding=True) - hook = inputs.dataset_initializer_hook() + dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() @@ -264,8 +261,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): features, signals)) with session.Session() as sess: - hook.begin() - hook.after_create_session(sess, coord=None) + sess.run(dataset_initializer) result, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual(a[:batch_size], result['a']) @@ -297,7 +293,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): dataset = input_fn(params) inputs = tpu_estimator._InputsWithStoppingSignals( dataset, batch_size, add_padding=True, num_invocations_per_step=2) - hook = inputs.dataset_initializer_hook() + dataset_initializer = inputs.dataset_initializer() features, _ = inputs.features_and_labels() signals = inputs.signals() @@ -305,8 +301,7 @@ class TPUEstimatorStoppingSignalsWithPaddingTest(test.TestCase): tpu_estimator._PaddingSignals.slice_tensor_or_dict(features, signals)) with session.Session() as sess: - hook.begin() - hook.after_create_session(sess, coord=None) + sess.run(dataset_initializer) result, evaluated_signals = sess.run([sliced_features, signals]) self.assertAllEqual(a[:batch_size], result['a']) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 900a0e11c4b5cb8d3c513255e3673aff03dcdb21..7789ea22fc99fa5c37dbc0a9ee9033f6f1e93c82 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -73,12 +73,10 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", "cc_header_only_library", - "full_path", "if_android", "if_ios", "if_linux_x86_64", "if_mobile", - "if_not_mobile", "if_not_windows", "if_windows", "tf_cc_test", @@ -1156,6 +1154,21 @@ tf_gen_op_libs( ], ) +cc_library( + name = "ragged_ops", + deps = [ + ":ragged_array_ops_op_lib", + ":ragged_math_ops_op_lib", + ], +) + +tf_gen_op_libs( + op_lib_names = [ + "ragged_array_ops", + "ragged_math_ops", + ], +) + cc_library( name = "ops", visibility = ["//visibility:public"], @@ -1189,6 +1202,7 @@ cc_library( ":nn_ops_op_lib", ":no_op_op_lib", ":parsing_ops_op_lib", + ":ragged_ops", ":random_ops_op_lib", ":remote_fused_graph_ops_op_lib", ":resource_variable_ops_op_lib", @@ -1342,6 +1356,7 @@ cc_library( "//tensorflow/core/kernels:parameterized_truncated_normal_op", "//tensorflow/core/kernels:parsing", "//tensorflow/core/kernels:partitioned_function_ops", + "//tensorflow/core/kernels:ragged_ops", "//tensorflow/core/kernels:random_ops", "//tensorflow/core/kernels:random_poisson_op", "//tensorflow/core/kernels:remote_fused_graph_ops", @@ -2810,6 +2825,7 @@ CORE_CPU_LIB_HEADERS = CORE_CPU_BASE_HDRS + [ "common_runtime/graph_optimizer.h", "common_runtime/local_device.h", "common_runtime/lower_if_op.h", + "common_runtime/lower_if_while.h", "common_runtime/lower_while_op.h", "common_runtime/memory_types.h", "common_runtime/mkl_cpu_allocator.h", @@ -2865,6 +2881,7 @@ tf_cuda_library( "common_runtime/hierarchical_tree_broadcaster.cc", "common_runtime/local_device.cc", "common_runtime/lower_if_op.cc", + "common_runtime/lower_if_while.cc", "common_runtime/lower_while_op.cc", "common_runtime/memory_types.cc", "common_runtime/mkl_cpu_allocator.cc", @@ -4710,6 +4727,29 @@ tf_cc_tests( ], ) +tf_cc_tests( + name = "common_runtime_lower_if_while_test", + size = "small", + srcs = ["common_runtime/lower_if_while_test.cc"], + deps = [ + ":all_kernels", + ":core_cpu", + ":core_cpu_internal", + ":direct_session", + ":framework", + ":framework_internal", + ":lib", + ":test", + ":test_main", + ":testlib", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/cc:client_session", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:ops", + ], +) + # Test data filegroup( name = "image_testdata", diff --git a/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt b/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt index 40c00ef58f8d9e6262023d6a3299fa5f6fbd8f2a..cd4cc5c906b2b2416d06d39c6176c4de99c979f5 100644 --- a/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Dequantize.pbtxt @@ -21,7 +21,7 @@ used to convert the float values to their quantized equivalents. In 'MIN_COMBINED' mode, each value of the tensor will undergo the following: ``` -if T == qint8, in[i] += (range(T) + 1)/ 2.0 +if T == qint8: in[i] += (range(T) + 1)/ 2.0 out[i] = min_range + (in[i]* (max_range - min_range) / range(T)) ``` here `range(T) = numeric_limits::max() - numeric_limits::min()` diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt new file mode 100644 index 0000000000000000000000000000000000000000..243922d96943d2c10a32d41eca1908124f92c3ce --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalNumaMapAndBatchDataset.pbtxt @@ -0,0 +1,58 @@ +op { + graph_op_name: "ExperimentalNumaMapAndBatchDataset" + visibility: HIDDEN + in_arg { + name: "input_dataset" + description: <